summaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go7
-rw-r--r--libgo/go/regexp/exec.go305
-rw-r--r--libgo/go/regexp/exec_test.go734
-rw-r--r--libgo/go/regexp/find_test.go32
-rw-r--r--libgo/go/regexp/regexp.go1011
-rw-r--r--libgo/go/regexp/syntax/compile.go288
-rw-r--r--libgo/go/regexp/syntax/parse.go1861
-rw-r--r--libgo/go/regexp/syntax/parse_test.go552
-rw-r--r--libgo/go/regexp/syntax/perl_groups.go130
-rw-r--r--libgo/go/regexp/syntax/prog.go306
-rw-r--r--libgo/go/regexp/syntax/prog_test.go102
-rw-r--r--libgo/go/regexp/syntax/regexp.go305
-rw-r--r--libgo/go/regexp/syntax/simplify.go151
-rw-r--r--libgo/go/regexp/syntax/simplify_test.go152
-rw-r--r--libgo/go/regexp/testdata/README23
-rw-r--r--libgo/go/regexp/testdata/basic.dat221
-rw-r--r--libgo/go/regexp/testdata/nullsubexpr.dat79
-rw-r--r--libgo/go/regexp/testdata/re2-search.txt3667
-rw-r--r--libgo/go/regexp/testdata/repetition.dat163
-rw-r--r--libgo/go/regexp/testdata/testregex.c2286
20 files changed, 11573 insertions, 802 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 71edc4d18d3..77f32ca1a57 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -24,13 +24,13 @@ var good_re = []string{
`[a-z]`,
`[a-abc-c\-\]\[]`,
`[a-z]+`,
- `[]`,
`[abc]`,
`[^1234]`,
`[^\n]`,
`\!\\`,
}
+/*
type stringError struct {
re string
err os.Error
@@ -51,6 +51,7 @@ var bad_re = []stringError{
{`a??`, ErrBadClosure},
{`\x`, ErrBadBackslash},
}
+*/
func compileTest(t *testing.T, expr string, error os.Error) *Regexp {
re, err := Compile(expr)
@@ -66,11 +67,13 @@ func TestGoodCompile(t *testing.T) {
}
}
+/*
func TestBadCompile(t *testing.T) {
for i := 0; i < len(bad_re); i++ {
compileTest(t, bad_re[i].re, bad_re[i].err)
}
}
+*/
func matchTest(t *testing.T, test *FindTest) {
re := compileTest(t, test.pat, nil)
@@ -240,7 +243,7 @@ var metaTests = []MetaTest{
{`foo`, `foo`, `foo`, true},
{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
{`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
- {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`, `!@#`, false},
+ {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
}
func TestQuoteMeta(t *testing.T) {
diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go
new file mode 100644
index 00000000000..3b0e3888524
--- /dev/null
+++ b/libgo/go/regexp/exec.go
@@ -0,0 +1,305 @@
+package regexp
+
+import "regexp/syntax"
+
+// A queue is a 'sparse array' holding pending threads of execution.
+// See http://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
+type queue struct {
+ sparse []uint32
+ dense []entry
+}
+
+// A entry is an entry on a queue.
+// It holds both the instruction pc and the actual thread.
+// Some queue entries are just place holders so that the machine
+// knows it has considered that pc. Such entries have t == nil.
+type entry struct {
+ pc uint32
+ t *thread
+}
+
+// A thread is the state of a single path through the machine:
+// an instruction and a corresponding capture array.
+// See http://swtch.com/~rsc/regexp/regexp2.html
+type thread struct {
+ inst *syntax.Inst
+ cap []int
+}
+
+// A machine holds all the state during an NFA simulation for p.
+type machine struct {
+ re *Regexp // corresponding Regexp
+ p *syntax.Prog // compiled program
+ q0, q1 queue // two queues for runq, nextq
+ pool []*thread // pool of available threads
+ matched bool // whether a match was found
+ matchcap []int // capture information for the match
+}
+
+// progMachine returns a new machine running the prog p.
+func progMachine(p *syntax.Prog) *machine {
+ m := &machine{p: p}
+ n := len(m.p.Inst)
+ m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
+ m.q1 = queue{make([]uint32, n), make([]entry, 0, n)}
+ ncap := p.NumCap
+ if ncap < 2 {
+ ncap = 2
+ }
+ m.matchcap = make([]int, ncap)
+ return m
+}
+
+func (m *machine) init(ncap int) {
+ for _, t := range m.pool {
+ t.cap = t.cap[:ncap]
+ }
+ m.matchcap = m.matchcap[:ncap]
+}
+
+// alloc allocates a new thread with the given instruction.
+// It uses the free pool if possible.
+func (m *machine) alloc(i *syntax.Inst) *thread {
+ var t *thread
+ if n := len(m.pool); n > 0 {
+ t = m.pool[n-1]
+ m.pool = m.pool[:n-1]
+ } else {
+ t = new(thread)
+ t.cap = make([]int, len(m.matchcap), cap(m.matchcap))
+ }
+ t.inst = i
+ return t
+}
+
+// free returns t to the free pool.
+func (m *machine) free(t *thread) {
+ m.pool = append(m.pool, t)
+}
+
+// match runs the machine over the input starting at pos.
+// It reports whether a match was found.
+// If so, m.matchcap holds the submatch information.
+func (m *machine) match(i input, pos int) bool {
+ startCond := m.re.cond
+ if startCond == ^syntax.EmptyOp(0) { // impossible
+ return false
+ }
+ m.matched = false
+ for i := range m.matchcap {
+ m.matchcap[i] = -1
+ }
+ runq, nextq := &m.q0, &m.q1
+ rune, rune1 := endOfText, endOfText
+ width, width1 := 0, 0
+ rune, width = i.step(pos)
+ if rune != endOfText {
+ rune1, width1 = i.step(pos + width)
+ }
+ var flag syntax.EmptyOp
+ if pos == 0 {
+ flag = syntax.EmptyOpContext(-1, rune)
+ } else {
+ flag = i.context(pos)
+ }
+ for {
+ if len(runq.dense) == 0 {
+ if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
+ // Anchored match, past beginning of text.
+ break
+ }
+ if m.matched {
+ // Have match; finished exploring alternatives.
+ break
+ }
+ if len(m.re.prefix) > 0 && rune1 != m.re.prefixRune && i.canCheckPrefix() {
+ // Match requires literal prefix; fast search for it.
+ advance := i.index(m.re, pos)
+ if advance < 0 {
+ break
+ }
+ pos += advance
+ rune, width = i.step(pos)
+ rune1, width1 = i.step(pos + width)
+ }
+ }
+ if !m.matched {
+ if len(m.matchcap) > 0 {
+ m.matchcap[0] = pos
+ }
+ m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil)
+ }
+ flag = syntax.EmptyOpContext(rune, rune1)
+ m.step(runq, nextq, pos, pos+width, rune, flag)
+ if width == 0 {
+ break
+ }
+ if len(m.matchcap) == 0 && m.matched {
+ // Found a match and not paying attention
+ // to where it is, so any match will do.
+ break
+ }
+ pos += width
+ rune, width = rune1, width1
+ if rune != endOfText {
+ rune1, width1 = i.step(pos + width)
+ }
+ runq, nextq = nextq, runq
+ }
+ m.clear(nextq)
+ return m.matched
+}
+
+// clear frees all threads on the thread queue.
+func (m *machine) clear(q *queue) {
+ for _, d := range q.dense {
+ if d.t != nil {
+ // m.free(d.t)
+ m.pool = append(m.pool, d.t)
+ }
+ }
+ q.dense = q.dense[:0]
+}
+
+// step executes one step of the machine, running each of the threads
+// on runq and appending new threads to nextq.
+// The step processes the rune c (which may be endOfText),
+// which starts at position pos and ends at nextPos.
+// nextCond gives the setting for the empty-width flags after c.
+func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) {
+ longest := m.re.longest
+ for j := 0; j < len(runq.dense); j++ {
+ d := &runq.dense[j]
+ t := d.t
+ if t == nil {
+ continue
+ }
+ if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
+ // m.free(t)
+ m.pool = append(m.pool, t)
+ continue
+ }
+ i := t.inst
+ add := false
+ switch i.Op {
+ default:
+ panic("bad inst")
+
+ case syntax.InstMatch:
+ if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
+ t.cap[1] = pos
+ copy(m.matchcap, t.cap)
+ }
+ if !longest {
+ // First-match mode: cut off all lower-priority threads.
+ for _, d := range runq.dense[j+1:] {
+ if d.t != nil {
+ // m.free(d.t)
+ m.pool = append(m.pool, d.t)
+ }
+ }
+ runq.dense = runq.dense[:0]
+ }
+ m.matched = true
+
+ case syntax.InstRune:
+ add = i.MatchRune(c)
+ case syntax.InstRune1:
+ add = c == i.Rune[0]
+ case syntax.InstRuneAny:
+ add = true
+ case syntax.InstRuneAnyNotNL:
+ add = c != '\n'
+ }
+ if add {
+ t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
+ }
+ if t != nil {
+ // m.free(t)
+ m.pool = append(m.pool, t)
+ }
+ }
+ runq.dense = runq.dense[:0]
+}
+
+// add adds an entry to q for pc, unless the q already has such an entry.
+// It also recursively adds an entry for all instructions reachable from pc by following
+// empty-width conditions satisfied by cond. pos gives the current position
+// in the input.
+func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread {
+ if pc == 0 {
+ return t
+ }
+ if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc {
+ return t
+ }
+
+ j := len(q.dense)
+ q.dense = q.dense[:j+1]
+ d := &q.dense[j]
+ d.t = nil
+ d.pc = pc
+ q.sparse[pc] = uint32(j)
+
+ i := &m.p.Inst[pc]
+ switch i.Op {
+ default:
+ panic("unhandled")
+ case syntax.InstFail:
+ // nothing
+ case syntax.InstAlt, syntax.InstAltMatch:
+ t = m.add(q, i.Out, pos, cap, cond, t)
+ t = m.add(q, i.Arg, pos, cap, cond, t)
+ case syntax.InstEmptyWidth:
+ if syntax.EmptyOp(i.Arg)&^cond == 0 {
+ t = m.add(q, i.Out, pos, cap, cond, t)
+ }
+ case syntax.InstNop:
+ t = m.add(q, i.Out, pos, cap, cond, t)
+ case syntax.InstCapture:
+ if int(i.Arg) < len(cap) {
+ opos := cap[i.Arg]
+ cap[i.Arg] = pos
+ m.add(q, i.Out, pos, cap, cond, nil)
+ cap[i.Arg] = opos
+ } else {
+ t = m.add(q, i.Out, pos, cap, cond, t)
+ }
+ case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
+ if t == nil {
+ t = m.alloc(i)
+ } else {
+ t.inst = i
+ }
+ if len(cap) > 0 && &t.cap[0] != &cap[0] {
+ copy(t.cap, cap)
+ }
+ d.t = t
+ t = nil
+ }
+ return t
+}
+
+// empty is a non-nil 0-element slice,
+// so doExecute can avoid an allocation
+// when 0 captures are requested from a successful match.
+var empty = make([]int, 0)
+
+// doExecute finds the leftmost match in the input and returns
+// the position of its subexpressions.
+func (re *Regexp) doExecute(i input, pos int, ncap int) []int {
+ m := re.get()
+ m.init(ncap)
+ if !m.match(i, pos) {
+ re.put(m)
+ return nil
+ }
+ if ncap == 0 {
+ re.put(m)
+ return empty // empty but not nil
+ }
+ cap := make([]int, ncap)
+ copy(cap, m.matchcap)
+ re.put(m)
+ return cap
+}
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
new file mode 100644
index 00000000000..905fd4ef12d
--- /dev/null
+++ b/libgo/go/regexp/exec_test.go
@@ -0,0 +1,734 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package regexp
+
+import (
+ "bufio"
+ "compress/bzip2"
+ "fmt"
+ "io"
+ old "old/regexp"
+ "os"
+ "path/filepath"
+ "rand"
+ "regexp/syntax"
+ "strconv"
+ "strings"
+ "testing"
+ "utf8"
+)
+
+// TestRE2 tests this package's regexp API against test cases
+// considered during RE2's exhaustive tests, which run all possible
+// regexps over a given set of atoms and operators, up to a given
+// complexity, over all possible strings over a given alphabet,
+// up to a given size. Rather than try to link with RE2, we read a
+// log file containing the test cases and the expected matches.
+// The log file, re2.txt, is generated by running 'make exhaustive-log'
+// in the open source RE2 distribution. http://code.google.com/p/re2/
+//
+// The test file format is a sequence of stanzas like:
+//
+// strings
+// "abc"
+// "123x"
+// regexps
+// "[a-z]+"
+// 0-3;0-3
+// -;-
+// "([0-9])([0-9])([0-9])"
+// -;-
+// -;0-3 0-1 1-2 2-3
+//
+// The stanza begins by defining a set of strings, quoted
+// using Go double-quote syntax, one per line. Then the
+// regexps section gives a sequence of regexps to run on
+// the strings. In the block that follows a regexp, each line
+// gives the semicolon-separated match results of running
+// the regexp on the corresponding string.
+// Each match result is either a single -, meaning no match, or a
+// space-separated sequence of pairs giving the match and
+// submatch indices. An unmatched subexpression formats
+// its pair as a single - (not illustrated above). For now
+// each regexp run produces two match results, one for a
+// ``full match'' that restricts the regexp to matching the entire
+// string or nothing, and one for a ``partial match'' that gives
+// the leftmost first match found in the string.
+//
+// Lines beginning with # are comments. Lines beginning with
+// a capital letter are test names printed during RE2's test suite
+// and are echoed into t but otherwise ignored.
+//
+// At time of writing, re2.txt is 32 MB but compresses to 760 kB,
+// so we store re2.txt.gz in the repository and decompress it on the fly.
+//
+func TestRE2Search(t *testing.T) {
+ testRE2(t, "testdata/re2-search.txt")
+}
+
+func TestRE2Exhaustive(t *testing.T) {
+ if testing.Short() {
+ t.Log("skipping TestRE2Exhaustive during short test")
+ return
+ }
+ testRE2(t, "testdata/re2-exhaustive.txt.bz2")
+}
+
+func testRE2(t *testing.T, file string) {
+ f, err := os.Open(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer f.Close()
+ var txt io.Reader
+ if strings.HasSuffix(file, ".bz2") {
+ z := bzip2.NewReader(f)
+ txt = z
+ file = file[:len(file)-len(".bz2")] // for error messages
+ } else {
+ txt = f
+ }
+ lineno := 0
+ r := bufio.NewReader(txt)
+ var (
+ str []string
+ input []string
+ inStrings bool
+ re *Regexp
+ refull *Regexp
+ nfail int
+ ncase int
+ )
+ for {
+ line, err := r.ReadString('\n')
+ if err != nil {
+ if err == os.EOF {
+ break
+ }
+ t.Fatalf("%s:%d: %v", file, lineno, err)
+ }
+ line = line[:len(line)-1] // chop \n
+ lineno++
+ switch {
+ case line == "":
+ t.Fatalf("%s:%d: unexpected blank line", file, lineno)
+ case line[0] == '#':
+ continue
+ case 'A' <= line[0] && line[0] <= 'Z':
+ // Test name.
+ t.Logf("%s\n", line)
+ continue
+ case line == "strings":
+ str = str[:0]
+ inStrings = true
+ case line == "regexps":
+ inStrings = false
+ case line[0] == '"':
+ q, err := strconv.Unquote(line)
+ if err != nil {
+ // Fatal because we'll get out of sync.
+ t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
+ }
+ if inStrings {
+ str = append(str, q)
+ continue
+ }
+ // Is a regexp.
+ if len(input) != 0 {
+ t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
+ }
+ re, err = tryCompile(q)
+ if err != nil {
+ if err.String() == "error parsing regexp: invalid escape sequence: `\\C`" {
+ // We don't and likely never will support \C; keep going.
+ continue
+ }
+ t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
+ if nfail++; nfail >= 100 {
+ t.Fatalf("stopping after %d errors", nfail)
+ }
+ continue
+ }
+ full := `\A(?:` + q + `)\z`
+ refull, err = tryCompile(full)
+ if err != nil {
+ // Fatal because q worked, so this should always work.
+ t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
+ }
+ input = str
+ case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
+ // A sequence of match results.
+ ncase++
+ if re == nil {
+ // Failed to compile: skip results.
+ continue
+ }
+ if len(input) == 0 {
+ t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
+ }
+ var text string
+ text, input = input[0], input[1:]
+ if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
+ // RE2's \B considers every byte position,
+ // so it sees 'not word boundary' in the
+ // middle of UTF-8 sequences. This package
+ // only considers the positions between runes,
+ // so it disagrees. Skip those cases.
+ continue
+ }
+ res := strings.Split(line, ";")
+ if len(res) != len(run) {
+ t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
+ }
+ for i := range res {
+ have, suffix := run[i](re, refull, text)
+ want := parseResult(t, file, lineno, res[i])
+ if !same(have, want) {
+ t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
+ if nfail++; nfail >= 100 {
+ t.Fatalf("stopping after %d errors", nfail)
+ }
+ continue
+ }
+ b, suffix := match[i](re, refull, text)
+ if b != (want != nil) {
+ t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
+ if nfail++; nfail >= 100 {
+ t.Fatalf("stopping after %d errors", nfail)
+ }
+ continue
+ }
+ }
+
+ default:
+ t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
+ }
+ }
+ if len(input) != 0 {
+ t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
+ }
+ t.Logf("%d cases tested", ncase)
+}
+
+var run = []func(*Regexp, *Regexp, string) ([]int, string){
+ runFull,
+ runPartial,
+ runFullLongest,
+ runPartialLongest,
+}
+
+func runFull(re, refull *Regexp, text string) ([]int, string) {
+ refull.longest = false
+ return refull.FindStringSubmatchIndex(text), "[full]"
+}
+
+func runPartial(re, refull *Regexp, text string) ([]int, string) {
+ re.longest = false
+ return re.FindStringSubmatchIndex(text), ""
+}
+
+func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
+ refull.longest = true
+ return refull.FindStringSubmatchIndex(text), "[full,longest]"
+}
+
+func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
+ re.longest = true
+ return re.FindStringSubmatchIndex(text), "[longest]"
+}
+
+var match = []func(*Regexp, *Regexp, string) (bool, string){
+ matchFull,
+ matchPartial,
+ matchFullLongest,
+ matchPartialLongest,
+}
+
+func matchFull(re, refull *Regexp, text string) (bool, string) {
+ refull.longest = false
+ return refull.MatchString(text), "[full]"
+}
+
+func matchPartial(re, refull *Regexp, text string) (bool, string) {
+ re.longest = false
+ return re.MatchString(text), ""
+}
+
+func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
+ refull.longest = true
+ return refull.MatchString(text), "[full,longest]"
+}
+
+func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
+ re.longest = true
+ return re.MatchString(text), "[longest]"
+}
+
+func isSingleBytes(s string) bool {
+ for _, c := range s {
+ if c >= utf8.RuneSelf {
+ return false
+ }
+ }
+ return true
+}
+
+func tryCompile(s string) (re *Regexp, err os.Error) {
+ // Protect against panic during Compile.
+ defer func() {
+ if r := recover(); r != nil {
+ err = fmt.Errorf("panic: %v", r)
+ }
+ }()
+ return Compile(s)
+}
+
+func parseResult(t *testing.T, file string, lineno int, res string) []int {
+ // A single - indicates no match.
+ if res == "-" {
+ return nil
+ }
+ // Otherwise, a space-separated list of pairs.
+ n := 1
+ for j := 0; j < len(res); j++ {
+ if res[j] == ' ' {
+ n++
+ }
+ }
+ out := make([]int, 2*n)
+ i := 0
+ n = 0
+ for j := 0; j <= len(res); j++ {
+ if j == len(res) || res[j] == ' ' {
+ // Process a single pair. - means no submatch.
+ pair := res[i:j]
+ if pair == "-" {
+ out[n] = -1
+ out[n+1] = -1
+ } else {
+ k := strings.Index(pair, "-")
+ if k < 0 {
+ t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
+ }
+ lo, err1 := strconv.Atoi(pair[:k])
+ hi, err2 := strconv.Atoi(pair[k+1:])
+ if err1 != nil || err2 != nil || lo > hi {
+ t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
+ }
+ out[n] = lo
+ out[n+1] = hi
+ }
+ n += 2
+ i = j + 1
+ }
+ }
+ return out
+}
+
+func same(x, y []int) bool {
+ if len(x) != len(y) {
+ return false
+ }
+ for i, xi := range x {
+ if xi != y[i] {
+ return false
+ }
+ }
+ return true
+}
+
+// TestFowler runs this package's regexp API against the
+// POSIX regular expression tests collected by Glenn Fowler
+// at http://www2.research.att.com/~gsf/testregex/.
+func TestFowler(t *testing.T) {
+ files, err := filepath.Glob("testdata/*.dat")
+ if err != nil {
+ t.Fatal(err)
+ }
+ for _, file := range files {
+ t.Log(file)
+ testFowler(t, file)
+ }
+}
+
+var notab = MustCompilePOSIX(`[^\t]+`)
+
+func testFowler(t *testing.T, file string) {
+ f, err := os.Open(file)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ defer f.Close()
+ b := bufio.NewReader(f)
+ lineno := 0
+ lastRegexp := ""
+Reading:
+ for {
+ lineno++
+ line, err := b.ReadString('\n')
+ if err != nil {
+ if err != os.EOF {
+ t.Errorf("%s:%d: %v", file, lineno, err)
+ }
+ break Reading
+ }
+
+ // http://www2.research.att.com/~gsf/man/man1/testregex.html
+ //
+ // INPUT FORMAT
+ // Input lines may be blank, a comment beginning with #, or a test
+ // specification. A specification is five fields separated by one
+ // or more tabs. NULL denotes the empty string and NIL denotes the
+ // 0 pointer.
+ if line[0] == '#' || line[0] == '\n' {
+ continue Reading
+ }
+ line = line[:len(line)-1]
+ field := notab.FindAllString(line, -1)
+ for i, f := range field {
+ if f == "NULL" {
+ field[i] = ""
+ }
+ if f == "NIL" {
+ t.Logf("%s:%d: skip: %s", file, lineno, line)
+ continue Reading
+ }
+ }
+ if len(field) == 0 {
+ continue Reading
+ }
+
+ // Field 1: the regex(3) flags to apply, one character per REG_feature
+ // flag. The test is skipped if REG_feature is not supported by the
+ // implementation. If the first character is not [BEASKLP] then the
+ // specification is a global control line. One or more of [BEASKLP] may be
+ // specified; the test will be repeated for each mode.
+ //
+ // B basic BRE (grep, ed, sed)
+ // E REG_EXTENDED ERE (egrep)
+ // A REG_AUGMENTED ARE (egrep with negation)
+ // S REG_SHELL SRE (sh glob)
+ // K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
+ // L REG_LITERAL LRE (fgrep)
+ //
+ // a REG_LEFT|REG_RIGHT implicit ^...$
+ // b REG_NOTBOL lhs does not match ^
+ // c REG_COMMENT ignore space and #...\n
+ // d REG_SHELL_DOT explicit leading . match
+ // e REG_NOTEOL rhs does not match $
+ // f REG_MULTIPLE multiple \n separated patterns
+ // g FNM_LEADING_DIR testfnmatch only -- match until /
+ // h REG_MULTIREF multiple digit backref
+ // i REG_ICASE ignore case
+ // j REG_SPAN . matches \n
+ // k REG_ESCAPE \ to ecape [...] delimiter
+ // l REG_LEFT implicit ^...
+ // m REG_MINIMAL minimal match
+ // n REG_NEWLINE explicit \n match
+ // o REG_ENCLOSED (|&) magic inside [@|&](...)
+ // p REG_SHELL_PATH explicit / match
+ // q REG_DELIMITED delimited pattern
+ // r REG_RIGHT implicit ...$
+ // s REG_SHELL_ESCAPED \ not special
+ // t REG_MUSTDELIM all delimiters must be specified
+ // u standard unspecified behavior -- errors not counted
+ // v REG_CLASS_ESCAPE \ special inside [...]
+ // w REG_NOSUB no subexpression match array
+ // x REG_LENIENT let some errors slide
+ // y REG_LEFT regexec() implicit ^...
+ // z REG_NULL NULL subexpressions ok
+ // $ expand C \c escapes in fields 2 and 3
+ // / field 2 is a regsubcomp() expression
+ // = field 3 is a regdecomp() expression
+ //
+ // Field 1 control lines:
+ //
+ // C set LC_COLLATE and LC_CTYPE to locale in field 2
+ //
+ // ?test ... output field 5 if passed and != EXPECTED, silent otherwise
+ // &test ... output field 5 if current and previous passed
+ // |test ... output field 5 if current passed and previous failed
+ // ; ... output field 2 if previous failed
+ // {test ... skip if failed until }
+ // } end of skip
+ //
+ // : comment comment copied as output NOTE
+ // :comment:test :comment: ignored
+ // N[OTE] comment comment copied as output NOTE
+ // T[EST] comment comment
+ //
+ // number use number for nmatch (20 by default)
+ flag := field[0]
+ switch flag[0] {
+ case '?', '&', '|', ';', '{', '}':
+ // Ignore all the control operators.
+ // Just run everything.
+ flag = flag[1:]
+ if flag == "" {
+ continue Reading
+ }
+ case ':':
+ i := strings.Index(flag[1:], ":")
+ if i < 0 {
+ t.Logf("skip: %s", line)
+ continue Reading
+ }
+ flag = flag[1+i+1:]
+ case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ t.Logf("skip: %s", line)
+ continue Reading
+ }
+
+ // Can check field count now that we've handled the myriad comment formats.
+ if len(field) < 4 {
+ t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
+ continue Reading
+ }
+
+ // Expand C escapes (a.k.a. Go escapes).
+ if strings.Contains(flag, "$") {
+ f := `"` + field[1] + `"`
+ if field[1], err = strconv.Unquote(f); err != nil {
+ t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
+ }
+ f = `"` + field[2] + `"`
+ if field[2], err = strconv.Unquote(f); err != nil {
+ t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
+ }
+ }
+
+ // Field 2: the regular expression pattern; SAME uses the pattern from
+ // the previous specification.
+ //
+ if field[1] == "SAME" {
+ field[1] = lastRegexp
+ }
+ lastRegexp = field[1]
+
+ // Field 3: the string to match.
+ text := field[2]
+
+ // Field 4: the test outcome...
+ ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
+ if !ok {
+ t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
+ continue Reading
+ }
+
+ // Field 5: optional comment appended to the report.
+
+ Testing:
+ // Run test once for each specified capital letter mode that we support.
+ for _, c := range flag {
+ pattern := field[1]
+ syn := syntax.POSIX | syntax.ClassNL
+ switch c {
+ default:
+ continue Testing
+ case 'E':
+ // extended regexp (what we support)
+ case 'L':
+ // literal
+ pattern = QuoteMeta(pattern)
+ }
+
+ for _, c := range flag {
+ switch c {
+ case 'i':
+ syn |= syntax.FoldCase
+ }
+ }
+
+ re, err := compile(pattern, syn, true)
+ if err != nil {
+ if shouldCompile {
+ t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
+ }
+ continue Testing
+ }
+ if !shouldCompile {
+ t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
+ continue Testing
+ }
+ match := re.MatchString(text)
+ if match != shouldMatch {
+ t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
+ continue Testing
+ }
+ have := re.FindStringSubmatchIndex(text)
+ if (len(have) > 0) != match {
+ t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
+ continue Testing
+ }
+ if len(have) > len(pos) {
+ have = have[:len(pos)]
+ }
+ if !same(have, pos) {
+ t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
+ }
+ }
+ }
+}
+
+func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
+ // Field 4: the test outcome. This is either one of the posix error
+ // codes (with REG_ omitted) or the match array, a list of (m,n)
+ // entries with m and n being first and last+1 positions in the
+ // field 3 string, or NULL if REG_NOSUB is in effect and success
+ // is expected. BADPAT is acceptable in place of any regcomp(3)
+ // error code. The match[] array is initialized to (-2,-2) before
+ // each test. All array elements from 0 to nmatch-1 must be specified
+ // in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
+ // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
+ // matched (?{...}) expression, where x is the text enclosed by {...},
+ // o is the expression ordinal counting from 1, and n is the length of
+ // the unmatched portion of the subject string. If x starts with a
+ // number then that is the return value of re_execf(), otherwise 0 is
+ // returned.
+ switch {
+ case s == "":
+ // Match with no position information.
+ ok = true
+ compiled = true
+ matched = true
+ return
+ case s == "NOMATCH":
+ // Match failure.
+ ok = true
+ compiled = true
+ matched = false
+ return
+ case 'A' <= s[0] && s[0] <= 'Z':
+ // All the other error codes are compile errors.
+ ok = true
+ compiled = false
+ return
+ }
+ compiled = true
+
+ var x []int
+ for s != "" {
+ var end byte = ')'
+ if len(x)%2 == 0 {
+ if s[0] != '(' {
+ ok = false
+ return
+ }
+ s = s[1:]
+ end = ','
+ }
+ i := 0
+ for i < len(s) && s[i] != end {
+ i++
+ }
+ if i == 0 || i == len(s) {
+ ok = false
+ return
+ }
+ var v = -1
+ var err os.Error
+ if s[:i] != "?" {
+ v, err = strconv.Atoi(s[:i])
+ if err != nil {
+ ok = false
+ return
+ }
+ }
+ x = append(x, v)
+ s = s[i+1:]
+ }
+ if len(x)%2 != 0 {
+ ok = false
+ return
+ }
+ ok = true
+ matched = true
+ pos = x
+ return
+}
+
+var text []byte
+
+func makeText(n int) []byte {
+ if len(text) >= n {
+ return text[:n]
+ }
+ text = make([]byte, n)
+ for i := range text {
+ if rand.Intn(30) == 0 {
+ text[i] = '\n'
+ } else {
+ text[i] = byte(rand.Intn(0x7E+1-0x20) + 0x20)
+ }
+ }
+ return text
+}
+
+func benchmark(b *testing.B, re string, n int) {
+ r := MustCompile(re)
+ t := makeText(n)
+ b.ResetTimer()
+ b.SetBytes(int64(n))
+ for i := 0; i < b.N; i++ {
+ if r.Match(t) {
+ panic("match!")
+ }
+ }
+}
+
+func benchold(b *testing.B, re string, n int) {
+ r := old.MustCompile(re)
+ t := makeText(n)
+ b.ResetTimer()
+ b.SetBytes(int64(n))
+ for i := 0; i < b.N; i++ {
+ if r.Match(t) {
+ panic("match!")
+ }
+ }
+}
+
+const (
+ easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+ easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
+ medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+ hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+ parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
+ "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
+)
+
+func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
+func BenchmarkMatchEasy0_1K_Old(b *testing.B) { benchold(b, easy0, 1<<10) }
+func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
+func BenchmarkMatchEasy0_1M_Old(b *testing.B) { benchold(b, easy0, 1<<20) }
+func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
+func BenchmarkMatchEasy0_32K_Old(b *testing.B) { benchold(b, easy0, 32<<10) }
+func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
+func BenchmarkMatchEasy0_32M_Old(b *testing.B) { benchold(b, easy0, 32<<20) }
+func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
+func BenchmarkMatchEasy1_1K_Old(b *testing.B) { benchold(b, easy1, 1<<10) }
+func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) }
+func BenchmarkMatchEasy1_1M_Old(b *testing.B) { benchold(b, easy1, 1<<20) }
+func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }
+func BenchmarkMatchEasy1_32K_Old(b *testing.B) { benchold(b, easy1, 32<<10) }
+func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) }
+func BenchmarkMatchEasy1_32M_Old(b *testing.B) { benchold(b, easy1, 32<<20) }
+func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) }
+func BenchmarkMatchMedium_1K_Old(b *testing.B) { benchold(b, medium, 1<<10) }
+func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) }
+func BenchmarkMatchMedium_1M_Old(b *testing.B) { benchold(b, medium, 1<<20) }
+func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
+func BenchmarkMatchMedium_32K_Old(b *testing.B) { benchold(b, medium, 32<<10) }
+func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
+func BenchmarkMatchMedium_32M_Old(b *testing.B) { benchold(b, medium, 32<<20) }
+func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
+func BenchmarkMatchHard_1K_Old(b *testing.B) { benchold(b, hard, 1<<10) }
+func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
+func BenchmarkMatchHard_1M_Old(b *testing.B) { benchold(b, hard, 1<<20) }
+func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
+func BenchmarkMatchHard_32K_Old(b *testing.B) { benchold(b, hard, 32<<10) }
+func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }
+func BenchmarkMatchHard_32M_Old(b *testing.B) { benchold(b, hard, 32<<20) }
diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go
index 83b249e3cef..e07eb7d5c05 100644
--- a/libgo/go/regexp/find_test.go
+++ b/libgo/go/regexp/find_test.go
@@ -58,8 +58,8 @@ var findTests = []FindTest{
{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
- {`\a\b\f\n\r\t\v`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
- {`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
+ {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
+ {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
@@ -80,6 +80,32 @@ var findTests = []FindTest{
{`data`, "daXY data", build(1, 5, 9)},
{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
{`zx+`, "zzx", build(1, 1, 3)},
+ {`ab$`, "abcab", build(1, 3, 5)},
+ {`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
+ {`(?:.|(?:.a))`, "", nil},
+ {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
+ {`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
+ {`(a){0}`, "", build(1, 0, 0, -1, -1)},
+ {`(?-s)(?:(?:^).)`, "\n", nil},
+ {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
+ {`(?:(?:^).)`, "\n", nil},
+ {`\b`, "x", build(2, 0, 0, 1, 1)},
+ {`\b`, "xx", build(2, 0, 0, 2, 2)},
+ {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
+ {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
+ {`\B`, "x", nil},
+ {`\B`, "xx", build(1, 1, 1)},
+ {`\B`, "x y", nil},
+ {`\B`, "xx yy", build(2, 1, 1, 4, 4)},
+
+ // RE2 tests
+ {`[^\S\s]`, "abcd", nil},
+ {`[^\S[:space:]]`, "abcd", nil},
+ {`[^\D\d]`, "abcd", nil},
+ {`[^\D[:digit:]]`, "abcd", nil},
+ {`(?i)\W`, "x", nil},
+ {`(?i)\W`, "k", nil},
+ {`(?i)\W`, "s", nil},
// can backslash-escape any punctuation
{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
@@ -209,7 +235,7 @@ func TestFindAll(t *testing.T) {
case test.matches == nil && result != nil:
t.Errorf("expected no match; got one: %s", test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Fatalf("expected match; got none: %s", test)
case test.matches != nil && result != nil:
if len(test.matches) != len(result) {
t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index e8d4c087cf8..2325f6204b1 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -3,27 +3,12 @@
// Package regexp implements a simple regular expression library.
//
-// The syntax of the regular expressions accepted is:
+// The syntax of the regular expressions accepted is the same
+// general syntax used by Perl, Python, and other languages.
+// More precisely, it is the syntax accepted by RE2 and described at
+// http://code.google.com/p/re2/wiki/Syntax, except for \C.
//
-// regexp:
-// concatenation { '|' concatenation }
-// concatenation:
-// { closure }
-// closure:
-// term [ '*' | '+' | '?' ]
-// term:
-// '^'
-// '$'
-// '.'
-// character
-// '[' [ '^' ] { character-range } ']'
-// '(' regexp ')'
-// character-range:
-// character [ '-' character ]
-//
-// All characters are UTF-8-encoded code points. Backslashes escape special
-// characters, including inside character classes. The standard Go character
-// escapes are also recognized: \a \b \f \n \r \t \v.
+// All characters are UTF-8-encoded code points.
//
// There are 16 methods of Regexp that match a regular expression and identify
// the matched text. Their names are matched by this regular expression:
@@ -72,7 +57,10 @@ import (
"bytes"
"io"
"os"
+ "regexp/syntax"
+ "strconv"
"strings"
+ "sync"
"utf8"
)
@@ -85,528 +73,24 @@ func (e Error) String() string {
return string(e)
}
-// Error codes returned by failures to parse an expression.
-var (
- ErrInternal = Error("regexp: internal error")
- ErrUnmatchedLpar = Error("regexp: unmatched '('")
- ErrUnmatchedRpar = Error("regexp: unmatched ')'")
- ErrUnmatchedLbkt = Error("regexp: unmatched '['")
- ErrUnmatchedRbkt = Error("regexp: unmatched ']'")
- ErrBadRange = Error("regexp: bad range in character class")
- ErrExtraneousBackslash = Error("regexp: extraneous backslash")
- ErrBadClosure = Error("regexp: repeated closure (**, ++, etc.)")
- ErrBareClosure = Error("regexp: closure applies to nothing")
- ErrBadBackslash = Error("regexp: illegal backslash escape")
-)
-
-const (
- iStart = iota // beginning of program
- iEnd // end of program: success
- iBOT // '^' beginning of text
- iEOT // '$' end of text
- iChar // 'a' regular character
- iCharClass // [a-z] character class
- iAny // '.' any character including newline
- iNotNL // [^\n] special case: any character but newline
- iBra // '(' parenthesized expression: 2*braNum for left, 2*braNum+1 for right
- iAlt // '|' alternation
- iNop // do nothing; makes it easy to link without patching
-)
-
-// An instruction executed by the NFA
-type instr struct {
- kind int // the type of this instruction: iChar, iAny, etc.
- index int // used only in debugging; could be eliminated
- next *instr // the instruction to execute after this one
- // Special fields valid only for some items.
- char int // iChar
- braNum int // iBra, iEbra
- cclass *charClass // iCharClass
- left *instr // iAlt, other branch
-}
-
-func (i *instr) print() {
- switch i.kind {
- case iStart:
- print("start")
- case iEnd:
- print("end")
- case iBOT:
- print("bot")
- case iEOT:
- print("eot")
- case iChar:
- print("char ", string(i.char))
- case iCharClass:
- i.cclass.print()
- case iAny:
- print("any")
- case iNotNL:
- print("notnl")
- case iBra:
- if i.braNum&1 == 0 {
- print("bra", i.braNum/2)
- } else {
- print("ebra", i.braNum/2)
- }
- case iAlt:
- print("alt(", i.left.index, ")")
- case iNop:
- print("nop")
- }
-}
-
// Regexp is the representation of a compiled regular expression.
// The public interface is entirely through methods.
// A Regexp is safe for concurrent use by multiple goroutines.
type Regexp struct {
- expr string // the original expression
- prefix string // initial plain text string
- prefixBytes []byte // initial plain text bytes
- inst []*instr
- start *instr // first instruction of machine
- prefixStart *instr // where to start if there is a prefix
- nbra int // number of brackets in expression, for subexpressions
-}
-
-type charClass struct {
- negate bool // is character class negated? ([^a-z])
- // slice of int, stored pairwise: [a-z] is (a,z); x is (x,x):
- ranges []int
- cmin, cmax int
-}
-
-func (cclass *charClass) print() {
- print("charclass")
- if cclass.negate {
- print(" (negated)")
- }
- for i := 0; i < len(cclass.ranges); i += 2 {
- l := cclass.ranges[i]
- r := cclass.ranges[i+1]
- if l == r {
- print(" [", string(l), "]")
- } else {
- print(" [", string(l), "-", string(r), "]")
- }
- }
-}
-
-func (cclass *charClass) addRange(a, b int) {
- // range is a through b inclusive
- cclass.ranges = append(cclass.ranges, a, b)
- if a < cclass.cmin {
- cclass.cmin = a
- }
- if b > cclass.cmax {
- cclass.cmax = b
- }
-}
-
-func (cclass *charClass) matches(c int) bool {
- if c < cclass.cmin || c > cclass.cmax {
- return cclass.negate
- }
- ranges := cclass.ranges
- for i := 0; i < len(ranges); i = i + 2 {
- if ranges[i] <= c && c <= ranges[i+1] {
- return !cclass.negate
- }
- }
- return cclass.negate
-}
-
-func newCharClass() *instr {
- i := &instr{kind: iCharClass}
- i.cclass = new(charClass)
- i.cclass.ranges = make([]int, 0, 4)
- i.cclass.cmin = 0x10FFFF + 1 // MaxRune + 1
- i.cclass.cmax = -1
- return i
-}
-
-func (re *Regexp) add(i *instr) *instr {
- i.index = len(re.inst)
- re.inst = append(re.inst, i)
- return i
-}
-
-type parser struct {
- re *Regexp
- nlpar int // number of unclosed lpars
- pos int
- ch int
-}
-
-func (p *parser) error(err Error) {
- panic(err)
-}
-
-const endOfText = -1
-
-func (p *parser) c() int { return p.ch }
-
-func (p *parser) nextc() int {
- if p.pos >= len(p.re.expr) {
- p.ch = endOfText
- } else {
- c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:])
- p.ch = c
- p.pos += w
- }
- return p.ch
-}
-
-func newParser(re *Regexp) *parser {
- p := new(parser)
- p.re = re
- p.nextc() // load p.ch
- return p
-}
-
-func special(c int) bool {
- for _, r := range `\.+*?()|[]^$` {
- if c == r {
- return true
- }
- }
- return false
-}
-
-func ispunct(c int) bool {
- for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" {
- if c == r {
- return true
- }
- }
- return false
-}
-
-var escapes = []byte("abfnrtv")
-var escaped = []byte("\a\b\f\n\r\t\v")
-
-func escape(c int) int {
- for i, b := range escapes {
- if int(b) == c {
- return i
- }
- }
- return -1
-}
-
-func (p *parser) checkBackslash() int {
- c := p.c()
- if c == '\\' {
- c = p.nextc()
- switch {
- case c == endOfText:
- p.error(ErrExtraneousBackslash)
- case ispunct(c):
- // c is as delivered
- case escape(c) >= 0:
- c = int(escaped[escape(c)])
- default:
- p.error(ErrBadBackslash)
- }
- }
- return c
-}
-
-func (p *parser) charClass() *instr {
- i := newCharClass()
- cc := i.cclass
- if p.c() == '^' {
- cc.negate = true
- p.nextc()
- }
- left := -1
- for {
- switch c := p.c(); c {
- case ']', endOfText:
- if left >= 0 {
- p.error(ErrBadRange)
- }
- // Is it [^\n]?
- if cc.negate && len(cc.ranges) == 2 &&
- cc.ranges[0] == '\n' && cc.ranges[1] == '\n' {
- nl := &instr{kind: iNotNL}
- p.re.add(nl)
- return nl
- }
- // Special common case: "[a]" -> "a"
- if !cc.negate && len(cc.ranges) == 2 && cc.ranges[0] == cc.ranges[1] {
- c := &instr{kind: iChar, char: cc.ranges[0]}
- p.re.add(c)
- return c
- }
- p.re.add(i)
- return i
- case '-': // do this before backslash processing
- p.error(ErrBadRange)
- default:
- c = p.checkBackslash()
- p.nextc()
- switch {
- case left < 0: // first of pair
- if p.c() == '-' { // range
- p.nextc()
- left = c
- } else { // single char
- cc.addRange(c, c)
- }
- case left <= c: // second of pair
- cc.addRange(left, c)
- left = -1
- default:
- p.error(ErrBadRange)
- }
- }
- }
- panic("unreachable")
-}
-
-func (p *parser) term() (start, end *instr) {
- switch c := p.c(); c {
- case '|', endOfText:
- return nil, nil
- case '*', '+', '?':
- p.error(ErrBareClosure)
- case ')':
- if p.nlpar == 0 {
- p.error(ErrUnmatchedRpar)
- }
- return nil, nil
- case ']':
- p.error(ErrUnmatchedRbkt)
- case '^':
- p.nextc()
- start = p.re.add(&instr{kind: iBOT})
- return start, start
- case '$':
- p.nextc()
- start = p.re.add(&instr{kind: iEOT})
- return start, start
- case '.':
- p.nextc()
- start = p.re.add(&instr{kind: iAny})
- return start, start
- case '[':
- p.nextc()
- start = p.charClass()
- if p.c() != ']' {
- p.error(ErrUnmatchedLbkt)
- }
- p.nextc()
- return start, start
- case '(':
- p.nextc()
- p.nlpar++
- p.re.nbra++ // increment first so first subexpr is \1
- nbra := p.re.nbra
- start, end = p.regexp()
- if p.c() != ')' {
- p.error(ErrUnmatchedLpar)
- }
- p.nlpar--
- p.nextc()
- bra := &instr{kind: iBra, braNum: 2 * nbra}
- p.re.add(bra)
- ebra := &instr{kind: iBra, braNum: 2*nbra + 1}
- p.re.add(ebra)
- if start == nil {
- if end == nil {
- p.error(ErrInternal)
- return
- }
- start = ebra
- } else {
- end.next = ebra
- }
- bra.next = start
- return bra, ebra
- default:
- c = p.checkBackslash()
- p.nextc()
- start = &instr{kind: iChar, char: c}
- p.re.add(start)
- return start, start
- }
- panic("unreachable")
-}
-
-func (p *parser) closure() (start, end *instr) {
- start, end = p.term()
- if start == nil {
- return
- }
- switch p.c() {
- case '*':
- // (start,end)*:
- alt := &instr{kind: iAlt}
- p.re.add(alt)
- end.next = alt // after end, do alt
- alt.left = start // alternate brach: return to start
- start = alt // alt becomes new (start, end)
- end = alt
- case '+':
- // (start,end)+:
- alt := &instr{kind: iAlt}
- p.re.add(alt)
- end.next = alt // after end, do alt
- alt.left = start // alternate brach: return to start
- end = alt // start is unchanged; end is alt
- case '?':
- // (start,end)?:
- alt := &instr{kind: iAlt}
- p.re.add(alt)
- nop := &instr{kind: iNop}
- p.re.add(nop)
- alt.left = start // alternate branch is start
- alt.next = nop // follow on to nop
- end.next = nop // after end, go to nop
- start = alt // start is now alt
- end = nop // end is nop pointed to by both branches
- default:
- return
- }
- switch p.nextc() {
- case '*', '+', '?':
- p.error(ErrBadClosure)
- }
- return
-}
-
-func (p *parser) concatenation() (start, end *instr) {
- for {
- nstart, nend := p.closure()
- switch {
- case nstart == nil: // end of this concatenation
- if start == nil { // this is the empty string
- nop := p.re.add(&instr{kind: iNop})
- return nop, nop
- }
- return
- case start == nil: // this is first element of concatenation
- start, end = nstart, nend
- default:
- end.next = nstart
- end = nend
- }
- }
- panic("unreachable")
-}
-
-func (p *parser) regexp() (start, end *instr) {
- start, end = p.concatenation()
- for {
- switch p.c() {
- default:
- return
- case '|':
- p.nextc()
- nstart, nend := p.concatenation()
- alt := &instr{kind: iAlt}
- p.re.add(alt)
- alt.left = start
- alt.next = nstart
- nop := &instr{kind: iNop}
- p.re.add(nop)
- end.next = nop
- nend.next = nop
- start, end = alt, nop
- }
- }
- panic("unreachable")
-}
-
-func unNop(i *instr) *instr {
- for i.kind == iNop {
- i = i.next
- }
- return i
-}
-
-func (re *Regexp) eliminateNops() {
- for _, inst := range re.inst {
- if inst.kind == iEnd {
- continue
- }
- inst.next = unNop(inst.next)
- if inst.kind == iAlt {
- inst.left = unNop(inst.left)
- }
- }
-}
-
-func (re *Regexp) dump() {
- print("prefix <", re.prefix, ">\n")
- for _, inst := range re.inst {
- print(inst.index, ": ")
- inst.print()
- if inst.kind != iEnd {
- print(" -> ", inst.next.index)
- }
- print("\n")
- }
-}
-
-func (re *Regexp) doParse() {
- p := newParser(re)
- start := &instr{kind: iStart}
- re.add(start)
- s, e := p.regexp()
- start.next = s
- re.start = start
- e.next = re.add(&instr{kind: iEnd})
-
- if debug {
- re.dump()
- println()
- }
-
- re.eliminateNops()
- if debug {
- re.dump()
- println()
- }
- re.setPrefix()
- if debug {
- re.dump()
- println()
- }
-}
-
-// Extract regular text from the beginning of the pattern,
-// possibly after a leading iBOT.
-// That text can be used by doExecute to speed up matching.
-func (re *Regexp) setPrefix() {
- var b []byte
- var utf = make([]byte, utf8.UTFMax)
- var inst *instr
- // First instruction is start; skip that. Also skip any initial iBOT.
- inst = re.inst[0].next
- for inst.kind == iBOT {
- inst = inst.next
- }
-Loop:
- for ; inst.kind != iEnd; inst = inst.next {
- // stop if this is not a char
- if inst.kind != iChar {
- break
- }
- // stop if this char can be followed by a match for an empty string,
- // which includes closures, ^, and $.
- switch inst.next.kind {
- case iBOT, iEOT, iAlt:
- break Loop
- }
- n := utf8.EncodeRune(utf, inst.char)
- b = append(b, utf[0:n]...)
- }
- // point prefixStart instruction to first non-CHAR after prefix
- re.prefixStart = inst
- re.prefixBytes = b
- re.prefix = string(b)
+ // read-only after Compile
+ expr string // as passed to Compile
+ prog *syntax.Prog // compiled program
+ prefix string // required prefix in unanchored matches
+ prefixBytes []byte // prefix, as a []byte
+ prefixComplete bool // prefix is the entire regexp
+ prefixRune int // first rune in prefix
+ cond syntax.EmptyOp // empty-width conditions required at start of match
+ numSubexp int
+ longest bool
+
+ // cache of machines for running regexp
+ mu sync.Mutex
+ machine []*machine
}
// String returns the source text used to compile the regular expression.
@@ -614,21 +98,96 @@ func (re *Regexp) String() string {
return re.expr
}
-// Compile parses a regular expression and returns, if successful, a Regexp
-// object that can be used to match against text.
-func Compile(str string) (regexp *Regexp, error os.Error) {
- regexp = new(Regexp)
- // doParse will panic if there is a parse error.
- defer func() {
- if e := recover(); e != nil {
- regexp = nil
- error = e.(Error) // Will re-panic if error was not an Error, e.g. nil-pointer exception
- }
- }()
- regexp.expr = str
- regexp.inst = make([]*instr, 0, 10)
- regexp.doParse()
- return
+// Compile parses a regular expression and returns, if successful,
+// a Regexp object that can be used to match against text.
+//
+// When matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses the one that a backtracking search would have found first.
+// This so-called leftmost-first matching is the same semantics
+// that Perl, Python, and other implementations use, although this
+// package implements it without the expense of backtracking.
+// For POSIX leftmost-longest matching, see CompilePOSIX.
+func Compile(expr string) (*Regexp, os.Error) {
+ return compile(expr, syntax.Perl, false)
+}
+
+// CompilePOSIX is like Compile but restricts the regular expression
+// to POSIX ERE (egrep) syntax and changes the match semantics to
+// leftmost-longest.
+//
+// That is, when matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses a match that is as long as possible.
+// This so-called leftmost-longest matching is the same semantics
+// that early regular expression implementations used and that POSIX
+// specifies.
+//
+// However, there can be multiple leftmost-longest matches, with different
+// submatch choices, and here this package diverges from POSIX.
+// Among the possible leftmost-longest matches, this package chooses
+// the one that a backtracking search would have found first, while POSIX
+// specifies that the match be chosen to maximize the length of the first
+// subexpression, then the second, and so on from left to right.
+// The POSIX rule is computationally prohibitive and not even well-defined.
+// See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
+func CompilePOSIX(expr string) (*Regexp, os.Error) {
+ return compile(expr, syntax.POSIX, true)
+}
+
+func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, os.Error) {
+ re, err := syntax.Parse(expr, mode)
+ if err != nil {
+ return nil, err
+ }
+ maxCap := re.MaxCap()
+ re = re.Simplify()
+ prog, err := syntax.Compile(re)
+ if err != nil {
+ return nil, err
+ }
+ regexp := &Regexp{
+ expr: expr,
+ prog: prog,
+ numSubexp: maxCap,
+ cond: prog.StartCond(),
+ longest: longest,
+ }
+ regexp.prefix, regexp.prefixComplete = prog.Prefix()
+ if regexp.prefix != "" {
+ // TODO(rsc): Remove this allocation by adding
+ // IndexString to package bytes.
+ regexp.prefixBytes = []byte(regexp.prefix)
+ regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
+ }
+ return regexp, nil
+}
+
+// get returns a machine to use for matching re.
+// It uses the re's machine cache if possible, to avoid
+// unnecessary allocation.
+func (re *Regexp) get() *machine {
+ re.mu.Lock()
+ if n := len(re.machine); n > 0 {
+ z := re.machine[n-1]
+ re.machine = re.machine[:n-1]
+ re.mu.Unlock()
+ return z
+ }
+ re.mu.Unlock()
+ z := progMachine(re.prog)
+ z.re = re
+ return z
+}
+
+// put returns a machine to the re's machine cache.
+// There is no attempt to limit the size of the cache, so it will
+// grow to the maximum number of simultaneous matches
+// run using re. (The cache empties when re gets garbage collected.)
+func (re *Regexp) put(z *machine) {
+ re.mu.Lock()
+ re.machine = append(re.machine, z)
+ re.mu.Unlock()
}
// MustCompile is like Compile but panics if the expression cannot be parsed.
@@ -637,116 +196,35 @@ func Compile(str string) (regexp *Regexp, error os.Error) {
func MustCompile(str string) *Regexp {
regexp, error := Compile(str)
if error != nil {
- panic(`regexp: compiling "` + str + `": ` + error.String())
+ panic(`regexp: Compile(` + quote(str) + `): ` + error.String())
}
return regexp
}
-// NumSubexp returns the number of parenthesized subexpressions in this Regexp.
-func (re *Regexp) NumSubexp() int { return re.nbra }
-
-// The match arena allows us to reduce the garbage generated by tossing
-// match vectors away as we execute. Matches are ref counted and returned
-// to a free list when no longer active. Increases a simple benchmark by 22X.
-type matchArena struct {
- head *matchVec
- len int // length of match vector
- pos int
- atBOT bool // whether we're at beginning of text
- atEOT bool // whether we're at end of text
-}
-
-type matchVec struct {
- m []int // pairs of bracketing submatches. 0th is start,end
- ref int
- next *matchVec
-}
-
-func (a *matchArena) new() *matchVec {
- if a.head == nil {
- const N = 10
- block := make([]matchVec, N)
- for i := 0; i < N; i++ {
- b := &block[i]
- b.next = a.head
- a.head = b
- }
- }
- m := a.head
- a.head = m.next
- m.ref = 0
- if m.m == nil {
- m.m = make([]int, a.len)
- }
- return m
-}
-
-func (a *matchArena) free(m *matchVec) {
- m.ref--
- if m.ref == 0 {
- m.next = a.head
- a.head = m
+// MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
+// It simplifies safe initialization of global variables holding compiled regular
+// expressions.
+func MustCompilePOSIX(str string) *Regexp {
+ regexp, error := CompilePOSIX(str)
+ if error != nil {
+ panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.String())
}
+ return regexp
}
-func (a *matchArena) copy(m *matchVec) *matchVec {
- m1 := a.new()
- copy(m1.m, m.m)
- return m1
-}
-
-func (a *matchArena) noMatch() *matchVec {
- m := a.new()
- for i := range m.m {
- m.m[i] = -1 // no match seen; catches cases like "a(b)?c" on "ac"
+func quote(s string) string {
+ if strconv.CanBackquote(s) {
+ return "`" + s + "`"
}
- m.ref = 1
- return m
+ return strconv.Quote(s)
}
-type state struct {
- inst *instr // next instruction to execute
- prefixed bool // this match began with a fixed prefix
- match *matchVec
+// NumSubexp returns the number of parenthesized subexpressions in this Regexp.
+func (re *Regexp) NumSubexp() int {
+ return re.numSubexp
}
-// Append new state to to-do list. Leftmost-longest wins so avoid
-// adding a state that's already active. The matchVec will be inc-ref'ed
-// if it is assigned to a state.
-func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matchVec) []state {
- switch inst.kind {
- case iBOT:
- if a.atBOT {
- s = a.addState(s, inst.next, prefixed, match)
- }
- return s
- case iEOT:
- if a.atEOT {
- s = a.addState(s, inst.next, prefixed, match)
- }
- return s
- case iBra:
- match.m[inst.braNum] = a.pos
- s = a.addState(s, inst.next, prefixed, match)
- return s
- }
- l := len(s)
- // States are inserted in order so it's sufficient to see if we have the same
- // instruction; no need to see if existing match is earlier (it is).
- for i := 0; i < l; i++ {
- if s[i].inst == inst {
- return s
- }
- }
- s = append(s, state{inst, prefixed, match})
- match.ref++
- if inst.kind == iAlt {
- s = a.addState(s, inst.left, prefixed, a.copy(match))
- // give other branch a copy of this match vector
- s = a.addState(s, inst.next, prefixed, a.copy(match))
- }
- return s
-}
+const endOfText = -1
// input abstracts different representations of the input text. It provides
// one-character lookahead.
@@ -755,6 +233,7 @@ type input interface {
canCheckPrefix() bool // can we look ahead without losing info?
hasPrefix(re *Regexp) bool
index(re *Regexp, pos int) int
+ context(pos int) syntax.EmptyOp
}
// inputString scans a string.
@@ -768,7 +247,11 @@ func newInputString(str string) *inputString {
func (i *inputString) step(pos int) (int, int) {
if pos < len(i.str) {
- return utf8.DecodeRuneInString(i.str[pos:len(i.str)])
+ c := i.str[pos]
+ if c < utf8.RuneSelf {
+ return int(c), 1
+ }
+ return utf8.DecodeRuneInString(i.str[pos:])
}
return endOfText, 0
}
@@ -785,6 +268,17 @@ func (i *inputString) index(re *Regexp, pos int) int {
return strings.Index(i.str[pos:], re.prefix)
}
+func (i *inputString) context(pos int) syntax.EmptyOp {
+ r1, r2 := -1, -1
+ if pos > 0 && pos <= len(i.str) {
+ r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+ }
+ if pos < len(i.str) {
+ r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+ }
+ return syntax.EmptyOpContext(r1, r2)
+}
+
// inputBytes scans a byte slice.
type inputBytes struct {
str []byte
@@ -796,7 +290,11 @@ func newInputBytes(str []byte) *inputBytes {
func (i *inputBytes) step(pos int) (int, int) {
if pos < len(i.str) {
- return utf8.DecodeRune(i.str[pos:len(i.str)])
+ c := i.str[pos]
+ if c < utf8.RuneSelf {
+ return int(c), 1
+ }
+ return utf8.DecodeRune(i.str[pos:])
}
return endOfText, 0
}
@@ -813,6 +311,17 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
return bytes.Index(i.str[pos:], re.prefixBytes)
}
+func (i *inputBytes) context(pos int) syntax.EmptyOp {
+ r1, r2 := -1, -1
+ if pos > 0 && pos <= len(i.str) {
+ r1, _ = utf8.DecodeLastRune(i.str[:pos])
+ }
+ if pos < len(i.str) {
+ r2, _ = utf8.DecodeRune(i.str[pos:])
+ }
+ return syntax.EmptyOpContext(r1, r2)
+}
+
// inputReader scans a RuneReader.
type inputReader struct {
r io.RuneReader
@@ -850,150 +359,35 @@ func (i *inputReader) index(re *Regexp, pos int) int {
return -1
}
-// Search match starting from pos bytes into the input.
-func (re *Regexp) doExecute(i input, pos int) []int {
- var s [2][]state
- s[0] = make([]state, 0, 10)
- s[1] = make([]state, 0, 10)
- in, out := 0, 1
- var final state
- found := false
- anchored := re.inst[0].next.kind == iBOT
- if anchored && pos > 0 {
- return nil
- }
- // fast check for initial plain substring
- if i.canCheckPrefix() && re.prefix != "" {
- advance := 0
- if anchored {
- if !i.hasPrefix(re) {
- return nil
- }
- } else {
- advance = i.index(re, pos)
- if advance == -1 {
- return nil
- }
- }
- pos += advance
- }
- // We look one character ahead so we can match $, which checks whether
- // we are at EOT.
- nextChar, nextWidth := i.step(pos)
- arena := &matchArena{
- len: 2 * (re.nbra + 1),
- pos: pos,
- atBOT: pos == 0,
- atEOT: nextChar == endOfText,
- }
- for c, startPos := 0, pos; c != endOfText; {
- if !found && (pos == startPos || !anchored) {
- // prime the pump if we haven't seen a match yet
- match := arena.noMatch()
- match.m[0] = pos
- s[out] = arena.addState(s[out], re.start.next, false, match)
- arena.free(match) // if addState saved it, ref was incremented
- } else if len(s[out]) == 0 {
- // machine has completed
- break
- }
- in, out = out, in // old out state is new in state
- // clear out old state
- old := s[out]
- for _, state := range old {
- arena.free(state.match)
- }
- s[out] = old[0:0] // truncate state vector
- c = nextChar
- thisPos := pos
- pos += nextWidth
- nextChar, nextWidth = i.step(pos)
- arena.atEOT = nextChar == endOfText
- arena.atBOT = false
- arena.pos = pos
- for _, st := range s[in] {
- switch st.inst.kind {
- case iBOT:
- case iEOT:
- case iChar:
- if c == st.inst.char {
- s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
- }
- case iCharClass:
- if st.inst.cclass.matches(c) {
- s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
- }
- case iAny:
- if c != endOfText {
- s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
- }
- case iNotNL:
- if c != endOfText && c != '\n' {
- s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
- }
- case iBra:
- case iAlt:
- case iEnd:
- // choose leftmost longest
- if !found || // first
- st.match.m[0] < final.match.m[0] || // leftmost
- (st.match.m[0] == final.match.m[0] && thisPos > final.match.m[1]) { // longest
- if final.match != nil {
- arena.free(final.match)
- }
- final = st
- final.match.ref++
- final.match.m[1] = thisPos
- }
- found = true
- default:
- st.inst.print()
- panic("unknown instruction in execute")
- }
- }
- }
- if final.match == nil {
- return nil
- }
- // if match found, back up start of match by width of prefix.
- if final.prefixed && len(final.match.m) > 0 {
- final.match.m[0] -= len(re.prefix)
- }
- return final.match.m
+func (i *inputReader) context(pos int) syntax.EmptyOp {
+ return 0
}
// LiteralPrefix returns a literal string that must begin any match
// of the regular expression re. It returns the boolean true if the
// literal string comprises the entire regular expression.
func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
- c := make([]int, len(re.inst)-2) // minus start and end.
- // First instruction is start; skip that.
- i := 0
- for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next {
- // stop if this is not a char
- if inst.kind != iChar {
- return string(c[:i]), false
- }
- c[i] = inst.char
- i++
- }
- return string(c[:i]), true
+ return re.prefix, re.prefixComplete
}
// MatchReader returns whether the Regexp matches the text read by the
// RuneReader. The return value is a boolean: true for match, false for no
// match.
func (re *Regexp) MatchReader(r io.RuneReader) bool {
- return len(re.doExecute(newInputReader(r), 0)) > 0
+ return re.doExecute(newInputReader(r), 0, 0) != nil
}
// MatchString returns whether the Regexp matches the string s.
// The return value is a boolean: true for match, false for no match.
-func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(newInputString(s), 0)) > 0 }
+func (re *Regexp) MatchString(s string) bool {
+ return re.doExecute(newInputString(s), 0, 0) != nil
+}
// Match returns whether the Regexp matches the byte slice b.
// The return value is a boolean: true for match, false for no match.
-func (re *Regexp) Match(b []byte) bool { return len(re.doExecute(newInputBytes(b), 0)) > 0 }
+func (re *Regexp) Match(b []byte) bool {
+ return re.doExecute(newInputBytes(b), 0, 0) != nil
+}
// MatchReader checks whether a textual regular expression matches the text
// read by the RuneReader. More complicated queries need to use Compile and
@@ -1044,7 +438,7 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str
searchPos := 0 // position where we next look for a match
buf := new(bytes.Buffer)
for searchPos <= len(src) {
- a := re.doExecute(newInputString(src), searchPos)
+ a := re.doExecute(newInputString(src), searchPos, 2)
if len(a) == 0 {
break // no more matches
}
@@ -1096,7 +490,7 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
searchPos := 0 // position where we next look for a match
buf := new(bytes.Buffer)
for searchPos <= len(src) {
- a := re.doExecute(newInputBytes(src), searchPos)
+ a := re.doExecute(newInputBytes(src), searchPos, 2)
if len(a) == 0 {
break // no more matches
}
@@ -1132,6 +526,12 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
return buf.Bytes()
}
+var specialBytes = []byte(`\.+*?()|[]{}^$`)
+
+func special(b byte) bool {
+ return bytes.IndexByte(specialBytes, b) >= 0
+}
+
// QuoteMeta returns a string that quotes all regular expression metacharacters
// inside the argument text; the returned string is a regular expression matching
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
@@ -1141,7 +541,7 @@ func QuoteMeta(s string) string {
// A byte loop is correct because all metacharacters are ASCII.
j := 0
for i := 0; i < len(s); i++ {
- if special(int(s[i])) {
+ if special(s[i]) {
b[j] = '\\'
j++
}
@@ -1151,6 +551,23 @@ func QuoteMeta(s string) string {
return string(b[0:j])
}
+// The number of capture values in the program may correspond
+// to fewer capturing expressions than are in the regexp.
+// For example, "(a){0}" turns into an empty program, so the
+// maximum capture in the program is 0 but we need to return
+// an expression for \1. Pad appends -1s to the slice a as needed.
+func (re *Regexp) pad(a []int) []int {
+ if a == nil {
+ // No match.
+ return nil
+ }
+ n := (1 + re.numSubexp) * 2
+ for len(a) < n {
+ a = append(a, -1)
+ }
+ return a
+}
+
// Find matches in slice b if b is non-nil, otherwise find matches in string s.
func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
var end int
@@ -1167,7 +584,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
} else {
in = newInputBytes(b)
}
- matches := re.doExecute(in, pos)
+ matches := re.doExecute(in, pos, re.prog.NumCap)
if len(matches) == 0 {
break
}
@@ -1198,7 +615,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
prevMatchEnd = matches[1]
if accept {
- deliver(matches)
+ deliver(re.pad(matches))
i++
}
}
@@ -1207,7 +624,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
// A return value of nil indicates no match.
func (re *Regexp) Find(b []byte) []byte {
- a := re.doExecute(newInputBytes(b), 0)
+ a := re.doExecute(newInputBytes(b), 0, 2)
if a == nil {
return nil
}
@@ -1219,7 +636,7 @@ func (re *Regexp) Find(b []byte) []byte {
// b[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindIndex(b []byte) (loc []int) {
- a := re.doExecute(newInputBytes(b), 0)
+ a := re.doExecute(newInputBytes(b), 0, 2)
if a == nil {
return nil
}
@@ -1232,7 +649,7 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
// necessary to distinguish these cases.
func (re *Regexp) FindString(s string) string {
- a := re.doExecute(newInputString(s), 0)
+ a := re.doExecute(newInputString(s), 0, 2)
if a == nil {
return ""
}
@@ -1244,7 +661,7 @@ func (re *Regexp) FindString(s string) string {
// itself is at s[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindStringIndex(s string) []int {
- a := re.doExecute(newInputString(s), 0)
+ a := re.doExecute(newInputString(s), 0, 2)
if a == nil {
return nil
}
@@ -1256,7 +673,7 @@ func (re *Regexp) FindStringIndex(s string) []int {
// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
// value of nil indicates no match.
func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
- a := re.doExecute(newInputReader(r), 0)
+ a := re.doExecute(newInputReader(r), 0, 2)
if a == nil {
return nil
}
@@ -1269,13 +686,13 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
// comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
- a := re.doExecute(newInputBytes(b), 0)
+ a := re.doExecute(newInputBytes(b), 0, re.prog.NumCap)
if a == nil {
return nil
}
- ret := make([][]byte, len(a)/2)
+ ret := make([][]byte, 1+re.numSubexp)
for i := range ret {
- if a[2*i] >= 0 {
+ if 2*i < len(a) && a[2*i] >= 0 {
ret[i] = b[a[2*i]:a[2*i+1]]
}
}
@@ -1288,7 +705,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
- return re.doExecute(newInputBytes(b), 0)
+ return re.pad(re.doExecute(newInputBytes(b), 0, re.prog.NumCap))
}
// FindStringSubmatch returns a slice of strings holding the text of the
@@ -1297,13 +714,13 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
// package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatch(s string) []string {
- a := re.doExecute(newInputString(s), 0)
+ a := re.doExecute(newInputString(s), 0, re.prog.NumCap)
if a == nil {
return nil
}
- ret := make([]string, len(a)/2)
+ ret := make([]string, 1+re.numSubexp)
for i := range ret {
- if a[2*i] >= 0 {
+ if 2*i < len(a) && a[2*i] >= 0 {
ret[i] = s[a[2*i]:a[2*i+1]]
}
}
@@ -1316,7 +733,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
// 'Index' descriptions in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
- return re.doExecute(newInputString(s), 0)
+ return re.pad(re.doExecute(newInputString(s), 0, re.prog.NumCap))
}
// FindReaderSubmatchIndex returns a slice holding the index pairs
@@ -1325,7 +742,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int {
// by the 'Submatch' and 'Index' descriptions in the package comment. A
// return value of nil indicates no match.
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
- return re.doExecute(newInputReader(r), 0)
+ return re.pad(re.doExecute(newInputReader(r), 0, re.prog.NumCap))
}
const startSize = 10 // The size at which to start a slice in the 'All' routines.
diff --git a/libgo/go/regexp/syntax/compile.go b/libgo/go/regexp/syntax/compile.go
new file mode 100644
index 00000000000..c415d39a57e
--- /dev/null
+++ b/libgo/go/regexp/syntax/compile.go
@@ -0,0 +1,288 @@
+package syntax
+
+import (
+ "os"
+ "unicode"
+)
+
+// A patchList is a list of instruction pointers that need to be filled in (patched).
+// Because the pointers haven't been filled in yet, we can reuse their storage
+// to hold the list. It's kind of sleazy, but works well in practice.
+// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
+//
+// These aren't really pointers: they're integers, so we can reinterpret them
+// this way without using package unsafe. A value l denotes
+// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
+// l == 0 denotes the empty list, okay because we start every program
+// with a fail instruction, so we'll never want to point at its output link.
+type patchList uint32
+
+func (l patchList) next(p *Prog) patchList {
+ i := &p.Inst[l>>1]
+ if l&1 == 0 {
+ return patchList(i.Out)
+ }
+ return patchList(i.Arg)
+}
+
+func (l patchList) patch(p *Prog, val uint32) {
+ for l != 0 {
+ i := &p.Inst[l>>1]
+ if l&1 == 0 {
+ l = patchList(i.Out)
+ i.Out = val
+ } else {
+ l = patchList(i.Arg)
+ i.Arg = val
+ }
+ }
+}
+
+func (l1 patchList) append(p *Prog, l2 patchList) patchList {
+ if l1 == 0 {
+ return l2
+ }
+ if l2 == 0 {
+ return l1
+ }
+
+ last := l1
+ for {
+ next := last.next(p)
+ if next == 0 {
+ break
+ }
+ last = next
+ }
+
+ i := &p.Inst[last>>1]
+ if last&1 == 0 {
+ i.Out = uint32(l2)
+ } else {
+ i.Arg = uint32(l2)
+ }
+ return l1
+}
+
+// A frag represents a compiled program fragment.
+type frag struct {
+ i uint32 // index of first instruction
+ out patchList // where to record end instruction
+}
+
+type compiler struct {
+ p *Prog
+}
+
+// Compile compiles the regexp into a program to be executed.
+// The regexp should have been simplified already (returned from re.Simplify).
+func Compile(re *Regexp) (*Prog, os.Error) {
+ var c compiler
+ c.init()
+ f := c.compile(re)
+ f.out.patch(c.p, c.inst(InstMatch).i)
+ c.p.Start = int(f.i)
+ return c.p, nil
+}
+
+func (c *compiler) init() {
+ c.p = new(Prog)
+ c.p.NumCap = 2 // implicit ( and ) for whole match $0
+ c.inst(InstFail)
+}
+
+var anyRuneNotNL = []int{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
+var anyRune = []int{0, unicode.MaxRune}
+
+func (c *compiler) compile(re *Regexp) frag {
+ switch re.Op {
+ case OpNoMatch:
+ return c.fail()
+ case OpEmptyMatch:
+ return c.nop()
+ case OpLiteral:
+ if len(re.Rune) == 0 {
+ return c.nop()
+ }
+ var f frag
+ for j := range re.Rune {
+ f1 := c.rune(re.Rune[j:j+1], re.Flags)
+ if j == 0 {
+ f = f1
+ } else {
+ f = c.cat(f, f1)
+ }
+ }
+ return f
+ case OpCharClass:
+ return c.rune(re.Rune, re.Flags)
+ case OpAnyCharNotNL:
+ return c.rune(anyRuneNotNL, 0)
+ case OpAnyChar:
+ return c.rune(anyRune, 0)
+ case OpBeginLine:
+ return c.empty(EmptyBeginLine)
+ case OpEndLine:
+ return c.empty(EmptyEndLine)
+ case OpBeginText:
+ return c.empty(EmptyBeginText)
+ case OpEndText:
+ return c.empty(EmptyEndText)
+ case OpWordBoundary:
+ return c.empty(EmptyWordBoundary)
+ case OpNoWordBoundary:
+ return c.empty(EmptyNoWordBoundary)
+ case OpCapture:
+ bra := c.cap(uint32(re.Cap << 1))
+ sub := c.compile(re.Sub[0])
+ ket := c.cap(uint32(re.Cap<<1 | 1))
+ return c.cat(c.cat(bra, sub), ket)
+ case OpStar:
+ return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
+ case OpPlus:
+ return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
+ case OpQuest:
+ return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
+ case OpConcat:
+ if len(re.Sub) == 0 {
+ return c.nop()
+ }
+ var f frag
+ for i, sub := range re.Sub {
+ if i == 0 {
+ f = c.compile(sub)
+ } else {
+ f = c.cat(f, c.compile(sub))
+ }
+ }
+ return f
+ case OpAlternate:
+ var f frag
+ for _, sub := range re.Sub {
+ f = c.alt(f, c.compile(sub))
+ }
+ return f
+ }
+ panic("regexp: unhandled case in compile")
+}
+
+func (c *compiler) inst(op InstOp) frag {
+ // TODO: impose length limit
+ f := frag{i: uint32(len(c.p.Inst))}
+ c.p.Inst = append(c.p.Inst, Inst{Op: op})
+ return f
+}
+
+func (c *compiler) nop() frag {
+ f := c.inst(InstNop)
+ f.out = patchList(f.i << 1)
+ return f
+}
+
+func (c *compiler) fail() frag {
+ return frag{}
+}
+
+func (c *compiler) cap(arg uint32) frag {
+ f := c.inst(InstCapture)
+ f.out = patchList(f.i << 1)
+ c.p.Inst[f.i].Arg = arg
+
+ if c.p.NumCap < int(arg)+1 {
+ c.p.NumCap = int(arg) + 1
+ }
+ return f
+}
+
+func (c *compiler) cat(f1, f2 frag) frag {
+ // concat of failure is failure
+ if f1.i == 0 || f2.i == 0 {
+ return frag{}
+ }
+
+ // TODO: elide nop
+
+ f1.out.patch(c.p, f2.i)
+ return frag{f1.i, f2.out}
+}
+
+func (c *compiler) alt(f1, f2 frag) frag {
+ // alt of failure is other
+ if f1.i == 0 {
+ return f2
+ }
+ if f2.i == 0 {
+ return f1
+ }
+
+ f := c.inst(InstAlt)
+ i := &c.p.Inst[f.i]
+ i.Out = f1.i
+ i.Arg = f2.i
+ f.out = f1.out.append(c.p, f2.out)
+ return f
+}
+
+func (c *compiler) quest(f1 frag, nongreedy bool) frag {
+ f := c.inst(InstAlt)
+ i := &c.p.Inst[f.i]
+ if nongreedy {
+ i.Arg = f1.i
+ f.out = patchList(f.i << 1)
+ } else {
+ i.Out = f1.i
+ f.out = patchList(f.i<<1 | 1)
+ }
+ f.out = f.out.append(c.p, f1.out)
+ return f
+}
+
+func (c *compiler) star(f1 frag, nongreedy bool) frag {
+ f := c.inst(InstAlt)
+ i := &c.p.Inst[f.i]
+ if nongreedy {
+ i.Arg = f1.i
+ f.out = patchList(f.i << 1)
+ } else {
+ i.Out = f1.i
+ f.out = patchList(f.i<<1 | 1)
+ }
+ f1.out.patch(c.p, f.i)
+ return f
+}
+
+func (c *compiler) plus(f1 frag, nongreedy bool) frag {
+ return frag{f1.i, c.star(f1, nongreedy).out}
+}
+
+func (c *compiler) empty(op EmptyOp) frag {
+ f := c.inst(InstEmptyWidth)
+ c.p.Inst[f.i].Arg = uint32(op)
+ f.out = patchList(f.i << 1)
+ return f
+}
+
+func (c *compiler) rune(rune []int, flags Flags) frag {
+ f := c.inst(InstRune)
+ i := &c.p.Inst[f.i]
+ i.Rune = rune
+ flags &= FoldCase // only relevant flag is FoldCase
+ if len(rune) != 1 || unicode.SimpleFold(rune[0]) == rune[0] {
+ // and sometimes not even that
+ flags &^= FoldCase
+ }
+ i.Arg = uint32(flags)
+ f.out = patchList(f.i << 1)
+
+ // Special cases for exec machine.
+ switch {
+ case flags&FoldCase == 0 && (len(rune) == 1 || len(rune) == 2 && rune[0] == rune[1]):
+ i.Op = InstRune1
+ case len(rune) == 2 && rune[0] == 0 && rune[1] == unicode.MaxRune:
+ i.Op = InstRuneAny
+ case len(rune) == 4 && rune[0] == 0 && rune[1] == '\n'-1 && rune[2] == '\n'+1 && rune[3] == unicode.MaxRune:
+ i.Op = InstRuneAnyNotNL
+ }
+
+ return f
+}
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
new file mode 100644
index 00000000000..7013459019c
--- /dev/null
+++ b/libgo/go/regexp/syntax/parse.go
@@ -0,0 +1,1861 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+ "os"
+ "sort"
+ "strings"
+ "unicode"
+ "utf8"
+)
+
+// An Error describes a failure to parse a regular expression
+// and gives the offending expression.
+type Error struct {
+ Code ErrorCode
+ Expr string
+}
+
+func (e *Error) String() string {
+ return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
+}
+
+// An ErrorCode describes a failure to parse a regular expression.
+type ErrorCode string
+
+const (
+ // Unexpected error
+ ErrInternalError ErrorCode = "regexp/syntax: internal error"
+
+ // Parse errors
+ ErrInvalidCharClass ErrorCode = "invalid character class"
+ ErrInvalidCharRange ErrorCode = "invalid character class range"
+ ErrInvalidEscape ErrorCode = "invalid escape sequence"
+ ErrInvalidNamedCapture ErrorCode = "invalid named capture"
+ ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
+ ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
+ ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
+ ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
+ ErrMissingBracket ErrorCode = "missing closing ]"
+ ErrMissingParen ErrorCode = "missing closing )"
+ ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
+ ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
+)
+
+func (e ErrorCode) String() string {
+ return string(e)
+}
+
+// Flags control the behavior of the parser and record information about regexp context.
+type Flags uint16
+
+const (
+ FoldCase Flags = 1 << iota // case-insensitive match
+ Literal // treat pattern as literal string
+ ClassNL // allow character classes like [^a-z] and [[:space:]] to match newline
+ DotNL // allow . to match newline
+ OneLine // treat ^ and $ as only matching at beginning and end of text
+ NonGreedy // make repetition operators default to non-greedy
+ PerlX // allow Perl extensions
+ UnicodeGroups // allow \p{Han}, \P{Han} for Unicode group and negation
+ WasDollar // regexp OpEndText was $, not \z
+ Simple // regexp contains no counted repetition
+
+ MatchNL = ClassNL | DotNL
+
+ Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible
+ POSIX Flags = 0 // POSIX syntax
+)
+
+// Pseudo-ops for parsing stack.
+const (
+ opLeftParen = opPseudo + iota
+ opVerticalBar
+)
+
+type parser struct {
+ flags Flags // parse mode flags
+ stack []*Regexp // stack of parsed expressions
+ free *Regexp
+ numCap int // number of capturing groups seen
+ wholeRegexp string
+ tmpClass []int // temporary char class work space
+}
+
+func (p *parser) newRegexp(op Op) *Regexp {
+ re := p.free
+ if re != nil {
+ p.free = re.Sub0[0]
+ *re = Regexp{}
+ } else {
+ re = new(Regexp)
+ }
+ re.Op = op
+ return re
+}
+
+func (p *parser) reuse(re *Regexp) {
+ re.Sub0[0] = p.free
+ p.free = re
+}
+
+// Parse stack manipulation.
+
+// push pushes the regexp re onto the parse stack and returns the regexp.
+func (p *parser) push(re *Regexp) *Regexp {
+ if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
+ // Single rune.
+ if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
+ return nil
+ }
+ re.Op = OpLiteral
+ re.Rune = re.Rune[:1]
+ re.Flags = p.flags &^ FoldCase
+ } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
+ re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
+ unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
+ unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
+ re.Op == OpCharClass && len(re.Rune) == 2 &&
+ re.Rune[0]+1 == re.Rune[1] &&
+ unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
+ unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
+ // Case-insensitive rune like [Aa] or [Δδ].
+ if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
+ return nil
+ }
+
+ // Rewrite as (case-insensitive) literal.
+ re.Op = OpLiteral
+ re.Rune = re.Rune[:1]
+ re.Flags = p.flags | FoldCase
+ } else {
+ // Incremental concatenation.
+ p.maybeConcat(-1, 0)
+ }
+
+ p.stack = append(p.stack, re)
+ return re
+}
+
+// maybeConcat implements incremental concatenation
+// of literal runes into string nodes. The parser calls this
+// before each push, so only the top fragment of the stack
+// might need processing. Since this is called before a push,
+// the topmost literal is no longer subject to operators like *
+// (Otherwise ab* would turn into (ab)*.)
+// If r >= 0 and there's a node left over, maybeConcat uses it
+// to push r with the given flags.
+// maybeConcat reports whether r was pushed.
+func (p *parser) maybeConcat(r int, flags Flags) bool {
+ n := len(p.stack)
+ if n < 2 {
+ return false
+ }
+
+ re1 := p.stack[n-1]
+ re2 := p.stack[n-2]
+ if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
+ return false
+ }
+
+ // Push re1 into re2.
+ re2.Rune = append(re2.Rune, re1.Rune...)
+
+ // Reuse re1 if possible.
+ if r >= 0 {
+ re1.Rune = re1.Rune0[:1]
+ re1.Rune[0] = r
+ re1.Flags = flags
+ return true
+ }
+
+ p.stack = p.stack[:n-1]
+ p.reuse(re1)
+ return false // did not push r
+}
+
+// newLiteral returns a new OpLiteral Regexp with the given flags
+func (p *parser) newLiteral(r int, flags Flags) *Regexp {
+ re := p.newRegexp(OpLiteral)
+ re.Flags = flags
+ if flags&FoldCase != 0 {
+ r = minFoldRune(r)
+ }
+ re.Rune0[0] = r
+ re.Rune = re.Rune0[:1]
+ return re
+}
+
+// minFoldRune returns the minimum rune fold-equivalent to r.
+func minFoldRune(r int) int {
+ if r < MinFold || r > MaxFold {
+ return r
+ }
+ min := r
+ r0 := r
+ for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
+ if min > r {
+ min = r
+ }
+ }
+ return min
+}
+
+// literal pushes a literal regexp for the rune r on the stack
+// and returns that regexp.
+func (p *parser) literal(r int) {
+ p.push(p.newLiteral(r, p.flags))
+}
+
+// op pushes a regexp with the given op onto the stack
+// and returns that regexp.
+func (p *parser) op(op Op) *Regexp {
+ re := p.newRegexp(op)
+ re.Flags = p.flags
+ return p.push(re)
+}
+
+// repeat replaces the top stack element with itself repeated according to op, min, max.
+// before is the regexp suffix starting at the repetition operator.
+// after is the regexp suffix following after the repetition operator.
+// repeat returns an updated 'after' and an error, if any.
+func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, os.Error) {
+ flags := p.flags
+ if p.flags&PerlX != 0 {
+ if len(after) > 0 && after[0] == '?' {
+ after = after[1:]
+ flags ^= NonGreedy
+ }
+ if lastRepeat != "" {
+ // In Perl it is not allowed to stack repetition operators:
+ // a** is a syntax error, not a doubled star, and a++ means
+ // something else entirely, which we don't support!
+ return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
+ }
+ }
+ n := len(p.stack)
+ if n == 0 {
+ return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
+ }
+ sub := p.stack[n-1]
+ if sub.Op >= opPseudo {
+ return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
+ }
+ re := p.newRegexp(op)
+ re.Min = min
+ re.Max = max
+ re.Flags = flags
+ re.Sub = re.Sub0[:1]
+ re.Sub[0] = sub
+ p.stack[n-1] = re
+ return after, nil
+}
+
+// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
+func (p *parser) concat() *Regexp {
+ p.maybeConcat(-1, 0)
+
+ // Scan down to find pseudo-operator | or (.
+ i := len(p.stack)
+ for i > 0 && p.stack[i-1].Op < opPseudo {
+ i--
+ }
+ subs := p.stack[i:]
+ p.stack = p.stack[:i]
+
+ // Empty concatenation is special case.
+ if len(subs) == 0 {
+ return p.push(p.newRegexp(OpEmptyMatch))
+ }
+
+ return p.push(p.collapse(subs, OpConcat))
+}
+
+// alternate replaces the top of the stack (above the topmost '(') with its alternation.
+func (p *parser) alternate() *Regexp {
+ // Scan down to find pseudo-operator (.
+ // There are no | above (.
+ i := len(p.stack)
+ for i > 0 && p.stack[i-1].Op < opPseudo {
+ i--
+ }
+ subs := p.stack[i:]
+ p.stack = p.stack[:i]
+
+ // Make sure top class is clean.
+ // All the others already are (see swapVerticalBar).
+ if len(subs) > 0 {
+ cleanAlt(subs[len(subs)-1])
+ }
+
+ // Empty alternate is special case
+ // (shouldn't happen but easy to handle).
+ if len(subs) == 0 {
+ return p.push(p.newRegexp(OpNoMatch))
+ }
+
+ return p.push(p.collapse(subs, OpAlternate))
+}
+
+// cleanAlt cleans re for eventual inclusion in an alternation.
+func cleanAlt(re *Regexp) {
+ switch re.Op {
+ case OpCharClass:
+ re.Rune = cleanClass(&re.Rune)
+ if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
+ re.Rune = nil
+ re.Op = OpAnyChar
+ return
+ }
+ if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
+ re.Rune = nil
+ re.Op = OpAnyCharNotNL
+ return
+ }
+ if cap(re.Rune)-len(re.Rune) > 100 {
+ // re.Rune will not grow any more.
+ // Make a copy or inline to reclaim storage.
+ re.Rune = append(re.Rune0[:0], re.Rune...)
+ }
+ }
+}
+
+// collapse returns the result of applying op to sub.
+// If sub contains op nodes, they all get hoisted up
+// so that there is never a concat of a concat or an
+// alternate of an alternate.
+func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
+ if len(subs) == 1 {
+ return subs[0]
+ }
+ re := p.newRegexp(op)
+ re.Sub = re.Sub0[:0]
+ for _, sub := range subs {
+ if sub.Op == op {
+ re.Sub = append(re.Sub, sub.Sub...)
+ p.reuse(sub)
+ } else {
+ re.Sub = append(re.Sub, sub)
+ }
+ }
+ if op == OpAlternate {
+ re.Sub = p.factor(re.Sub, re.Flags)
+ if len(re.Sub) == 1 {
+ old := re
+ re = re.Sub[0]
+ p.reuse(old)
+ }
+ }
+ return re
+}
+
+// factor factors common prefixes from the alternation list sub.
+// It returns a replacement list that reuses the same storage and
+// frees (passes to p.reuse) any removed *Regexps.
+//
+// For example,
+// ABC|ABD|AEF|BCX|BCY
+// simplifies by literal prefix extraction to
+// A(B(C|D)|EF)|BC(X|Y)
+// which simplifies by character class introduction to
+// A(B[CD]|EF)|BC[XY]
+//
+func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
+ if len(sub) < 2 {
+ return sub
+ }
+
+ // Round 1: Factor out common literal prefixes.
+ var str []int
+ var strflags Flags
+ start := 0
+ out := sub[:0]
+ for i := 0; i <= len(sub); i++ {
+ // Invariant: the Regexps that were in sub[0:start] have been
+ // used or marked for reuse, and the slice space has been reused
+ // for out (len(out) <= start).
+ //
+ // Invariant: sub[start:i] consists of regexps that all begin
+ // with str as modified by strflags.
+ var istr []int
+ var iflags Flags
+ if i < len(sub) {
+ istr, iflags = p.leadingString(sub[i])
+ if iflags == strflags {
+ same := 0
+ for same < len(str) && same < len(istr) && str[same] == istr[same] {
+ same++
+ }
+ if same > 0 {
+ // Matches at least one rune in current range.
+ // Keep going around.
+ str = str[:same]
+ continue
+ }
+ }
+ }
+
+ // Found end of a run with common leading literal string:
+ // sub[start:i] all begin with str[0:len(str)], but sub[i]
+ // does not even begin with str[0].
+ //
+ // Factor out common string and append factored expression to out.
+ if i == start {
+ // Nothing to do - run of length 0.
+ } else if i == start+1 {
+ // Just one: don't bother factoring.
+ out = append(out, sub[start])
+ } else {
+ // Construct factored form: prefix(suffix1|suffix2|...)
+ prefix := p.newRegexp(OpLiteral)
+ prefix.Flags = strflags
+ prefix.Rune = append(prefix.Rune[:0], str...)
+
+ for j := start; j < i; j++ {
+ sub[j] = p.removeLeadingString(sub[j], len(str))
+ }
+ suffix := p.collapse(sub[start:i], OpAlternate) // recurse
+
+ re := p.newRegexp(OpConcat)
+ re.Sub = append(re.Sub[:0], prefix, suffix)
+ out = append(out, re)
+ }
+
+ // Prepare for next iteration.
+ start = i
+ str = istr
+ strflags = iflags
+ }
+ sub = out
+
+ // Round 2: Factor out common complex prefixes,
+ // just the first piece of each concatenation,
+ // whatever it is. This is good enough a lot of the time.
+ start = 0
+ out = sub[:0]
+ var first *Regexp
+ for i := 0; i <= len(sub); i++ {
+ // Invariant: the Regexps that were in sub[0:start] have been
+ // used or marked for reuse, and the slice space has been reused
+ // for out (len(out) <= start).
+ //
+ // Invariant: sub[start:i] consists of regexps that all begin with ifirst.
+ var ifirst *Regexp
+ if i < len(sub) {
+ ifirst = p.leadingRegexp(sub[i])
+ if first != nil && first.Equal(ifirst) {
+ continue
+ }
+ }
+
+ // Found end of a run with common leading regexp:
+ // sub[start:i] all begin with first but sub[i] does not.
+ //
+ // Factor out common regexp and append factored expression to out.
+ if i == start {
+ // Nothing to do - run of length 0.
+ } else if i == start+1 {
+ // Just one: don't bother factoring.
+ out = append(out, sub[start])
+ } else {
+ // Construct factored form: prefix(suffix1|suffix2|...)
+ prefix := first
+ for j := start; j < i; j++ {
+ reuse := j != start // prefix came from sub[start]
+ sub[j] = p.removeLeadingRegexp(sub[j], reuse)
+ }
+ suffix := p.collapse(sub[start:i], OpAlternate) // recurse
+
+ re := p.newRegexp(OpConcat)
+ re.Sub = append(re.Sub[:0], prefix, suffix)
+ out = append(out, re)
+ }
+
+ // Prepare for next iteration.
+ start = i
+ first = ifirst
+ }
+ sub = out
+
+ // Round 3: Collapse runs of single literals into character classes.
+ start = 0
+ out = sub[:0]
+ for i := 0; i <= len(sub); i++ {
+ // Invariant: the Regexps that were in sub[0:start] have been
+ // used or marked for reuse, and the slice space has been reused
+ // for out (len(out) <= start).
+ //
+ // Invariant: sub[start:i] consists of regexps that are either
+ // literal runes or character classes.
+ if i < len(sub) && isCharClass(sub[i]) {
+ continue
+ }
+
+ // sub[i] is not a char or char class;
+ // emit char class for sub[start:i]...
+ if i == start {
+ // Nothing to do - run of length 0.
+ } else if i == start+1 {
+ out = append(out, sub[start])
+ } else {
+ // Make new char class.
+ // Start with most complex regexp in sub[start].
+ max := start
+ for j := start + 1; j < i; j++ {
+ if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
+ max = j
+ }
+ }
+ sub[start], sub[max] = sub[max], sub[start]
+
+ for j := start + 1; j < i; j++ {
+ mergeCharClass(sub[start], sub[j])
+ p.reuse(sub[j])
+ }
+ cleanAlt(sub[start])
+ out = append(out, sub[start])
+ }
+
+ // ... and then emit sub[i].
+ if i < len(sub) {
+ out = append(out, sub[i])
+ }
+ start = i + 1
+ }
+ sub = out
+
+ // Round 4: Collapse runs of empty matches into a single empty match.
+ start = 0
+ out = sub[:0]
+ for i := range sub {
+ if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
+ continue
+ }
+ out = append(out, sub[i])
+ }
+ sub = out
+
+ return sub
+}
+
+// leadingString returns the leading literal string that re begins with.
+// The string refers to storage in re or its children.
+func (p *parser) leadingString(re *Regexp) ([]int, Flags) {
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ re = re.Sub[0]
+ }
+ if re.Op != OpLiteral {
+ return nil, 0
+ }
+ return re.Rune, re.Flags & FoldCase
+}
+
+// removeLeadingString removes the first n leading runes
+// from the beginning of re. It returns the replacement for re.
+func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ // Removing a leading string in a concatenation
+ // might simplify the concatenation.
+ sub := re.Sub[0]
+ sub = p.removeLeadingString(sub, n)
+ re.Sub[0] = sub
+ if sub.Op == OpEmptyMatch {
+ p.reuse(sub)
+ switch len(re.Sub) {
+ case 0, 1:
+ // Impossible but handle.
+ re.Op = OpEmptyMatch
+ re.Sub = nil
+ case 2:
+ old := re
+ re = re.Sub[1]
+ p.reuse(old)
+ default:
+ copy(re.Sub, re.Sub[1:])
+ re.Sub = re.Sub[:len(re.Sub)-1]
+ }
+ }
+ return re
+ }
+
+ if re.Op == OpLiteral {
+ re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
+ if len(re.Rune) == 0 {
+ re.Op = OpEmptyMatch
+ }
+ }
+ return re
+}
+
+// leadingRegexp returns the leading regexp that re begins with.
+// The regexp refers to storage in re or its children.
+func (p *parser) leadingRegexp(re *Regexp) *Regexp {
+ if re.Op == OpEmptyMatch {
+ return nil
+ }
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ sub := re.Sub[0]
+ if sub.Op == OpEmptyMatch {
+ return nil
+ }
+ return sub
+ }
+ return re
+}
+
+// removeLeadingRegexp removes the leading regexp in re.
+// It returns the replacement for re.
+// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse.
+func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ if reuse {
+ p.reuse(re.Sub[0])
+ }
+ re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
+ switch len(re.Sub) {
+ case 0:
+ re.Op = OpEmptyMatch
+ re.Sub = nil
+ case 1:
+ old := re
+ re = re.Sub[0]
+ p.reuse(old)
+ }
+ return re
+ }
+ if reuse {
+ p.reuse(re)
+ }
+ return p.newRegexp(OpEmptyMatch)
+}
+
+func literalRegexp(s string, flags Flags) *Regexp {
+ re := &Regexp{Op: OpLiteral}
+ re.Flags = flags
+ re.Rune = re.Rune0[:0] // use local storage for small strings
+ for _, c := range s {
+ if len(re.Rune) >= cap(re.Rune) {
+ // string is too long to fit in Rune0. let Go handle it
+ re.Rune = []int(s)
+ break
+ }
+ re.Rune = append(re.Rune, c)
+ }
+ return re
+}
+
+// Parsing.
+
+func Parse(s string, flags Flags) (*Regexp, os.Error) {
+ if flags&Literal != 0 {
+ // Trivial parser for literal string.
+ if err := checkUTF8(s); err != nil {
+ return nil, err
+ }
+ return literalRegexp(s, flags), nil
+ }
+
+ // Otherwise, must do real work.
+ var (
+ p parser
+ err os.Error
+ c int
+ op Op
+ lastRepeat string
+ min, max int
+ )
+ p.flags = flags
+ p.wholeRegexp = s
+ t := s
+ for t != "" {
+ repeat := ""
+ BigSwitch:
+ switch t[0] {
+ default:
+ if c, t, err = nextRune(t); err != nil {
+ return nil, err
+ }
+ p.literal(c)
+
+ case '(':
+ if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
+ // Flag changes and non-capturing groups.
+ if t, err = p.parsePerlFlags(t); err != nil {
+ return nil, err
+ }
+ break
+ }
+ p.numCap++
+ p.op(opLeftParen).Cap = p.numCap
+ t = t[1:]
+ case '|':
+ if err = p.parseVerticalBar(); err != nil {
+ return nil, err
+ }
+ t = t[1:]
+ case ')':
+ if err = p.parseRightParen(); err != nil {
+ return nil, err
+ }
+ t = t[1:]
+ case '^':
+ if p.flags&OneLine != 0 {
+ p.op(OpBeginText)
+ } else {
+ p.op(OpBeginLine)
+ }
+ t = t[1:]
+ case '$':
+ if p.flags&OneLine != 0 {
+ p.op(OpEndText).Flags |= WasDollar
+ } else {
+ p.op(OpEndLine)
+ }
+ t = t[1:]
+ case '.':
+ if p.flags&DotNL != 0 {
+ p.op(OpAnyChar)
+ } else {
+ p.op(OpAnyCharNotNL)
+ }
+ t = t[1:]
+ case '[':
+ if t, err = p.parseClass(t); err != nil {
+ return nil, err
+ }
+ case '*', '+', '?':
+ before := t
+ switch t[0] {
+ case '*':
+ op = OpStar
+ case '+':
+ op = OpPlus
+ case '?':
+ op = OpQuest
+ }
+ after := t[1:]
+ if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
+ return nil, err
+ }
+ repeat = before
+ t = after
+ case '{':
+ op = OpRepeat
+ before := t
+ min, max, after, ok := p.parseRepeat(t)
+ if !ok {
+ // If the repeat cannot be parsed, { is a literal.
+ p.literal('{')
+ t = t[1:]
+ break
+ }
+ if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
+ // Numbers were too big, or max is present and min > max.
+ return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
+ }
+ if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
+ return nil, err
+ }
+ repeat = before
+ t = after
+ case '\\':
+ if p.flags&PerlX != 0 && len(t) >= 2 {
+ switch t[1] {
+ case 'A':
+ p.op(OpBeginText)
+ t = t[2:]
+ break BigSwitch
+ case 'b':
+ p.op(OpWordBoundary)
+ t = t[2:]
+ break BigSwitch
+ case 'B':
+ p.op(OpNoWordBoundary)
+ t = t[2:]
+ break BigSwitch
+ case 'C':
+ // any byte; not supported
+ return nil, &Error{ErrInvalidEscape, t[:2]}
+ case 'Q':
+ // \Q ... \E: the ... is always literals
+ var lit string
+ if i := strings.Index(t, `\E`); i < 0 {
+ lit = t[2:]
+ t = ""
+ } else {
+ lit = t[2:i]
+ t = t[i+2:]
+ }
+ p.push(literalRegexp(lit, p.flags))
+ break BigSwitch
+ case 'z':
+ p.op(OpEndText)
+ t = t[2:]
+ break BigSwitch
+ }
+ }
+
+ re := p.newRegexp(OpCharClass)
+ re.Flags = p.flags
+
+ // Look for Unicode character group like \p{Han}
+ if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
+ r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
+ if err != nil {
+ return nil, err
+ }
+ if r != nil {
+ re.Rune = r
+ t = rest
+ p.push(re)
+ break BigSwitch
+ }
+ }
+
+ // Perl character class escape.
+ if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
+ re.Rune = r
+ t = rest
+ p.push(re)
+ break BigSwitch
+ }
+ p.reuse(re)
+
+ // Ordinary single-character escape.
+ if c, t, err = p.parseEscape(t); err != nil {
+ return nil, err
+ }
+ p.literal(c)
+ }
+ lastRepeat = repeat
+ }
+
+ p.concat()
+ if p.swapVerticalBar() {
+ // pop vertical bar
+ p.stack = p.stack[:len(p.stack)-1]
+ }
+ p.alternate()
+
+ n := len(p.stack)
+ if n != 1 {
+ return nil, &Error{ErrMissingParen, s}
+ }
+ return p.stack[0], nil
+}
+
+// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}.
+// If s is not of that form, it returns ok == false.
+// If s has the right form but the values are too big, it returns min == -1, ok == true.
+func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
+ if s == "" || s[0] != '{' {
+ return
+ }
+ s = s[1:]
+ var ok1 bool
+ if min, s, ok1 = p.parseInt(s); !ok1 {
+ return
+ }
+ if s == "" {
+ return
+ }
+ if s[0] != ',' {
+ max = min
+ } else {
+ s = s[1:]
+ if s == "" {
+ return
+ }
+ if s[0] == '}' {
+ max = -1
+ } else if max, s, ok1 = p.parseInt(s); !ok1 {
+ return
+ } else if max < 0 {
+ // parseInt found too big a number
+ min = -1
+ }
+ }
+ if s == "" || s[0] != '}' {
+ return
+ }
+ rest = s[1:]
+ ok = true
+ return
+}
+
+// parsePerlFlags parses a Perl flag setting or non-capturing group or both,
+// like (?i) or (?: or (?i:. It removes the prefix from s and updates the parse state.
+// The caller must have ensured that s begins with "(?".
+func (p *parser) parsePerlFlags(s string) (rest string, err os.Error) {
+ t := s
+
+ // Check for named captures, first introduced in Python's regexp library.
+ // As usual, there are three slightly different syntaxes:
+ //
+ // (?P<name>expr) the original, introduced by Python
+ // (?<name>expr) the .NET alteration, adopted by Perl 5.10
+ // (?'name'expr) another .NET alteration, adopted by Perl 5.10
+ //
+ // Perl 5.10 gave in and implemented the Python version too,
+ // but they claim that the last two are the preferred forms.
+ // PCRE and languages based on it (specifically, PHP and Ruby)
+ // support all three as well. EcmaScript 4 uses only the Python form.
+ //
+ // In both the open source world (via Code Search) and the
+ // Google source tree, (?P<expr>name) is the dominant form,
+ // so that's the one we implement. One is enough.
+ if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
+ // Pull out name.
+ end := strings.IndexRune(t, '>')
+ if end < 0 {
+ if err = checkUTF8(t); err != nil {
+ return "", err
+ }
+ return "", &Error{ErrInvalidNamedCapture, s}
+ }
+
+ capture := t[:end+1] // "(?P<name>"
+ name := t[4:end] // "name"
+ if err = checkUTF8(name); err != nil {
+ return "", err
+ }
+ if !isValidCaptureName(name) {
+ return "", &Error{ErrInvalidNamedCapture, capture}
+ }
+
+ // Like ordinary capture, but named.
+ p.numCap++
+ re := p.op(opLeftParen)
+ re.Cap = p.numCap
+ re.Name = name
+ return t[end+1:], nil
+ }
+
+ // Non-capturing group. Might also twiddle Perl flags.
+ var c int
+ t = t[2:] // skip (?
+ flags := p.flags
+ sign := +1
+ sawFlag := false
+Loop:
+ for t != "" {
+ if c, t, err = nextRune(t); err != nil {
+ return "", err
+ }
+ switch c {
+ default:
+ break Loop
+
+ // Flags.
+ case 'i':
+ flags |= FoldCase
+ sawFlag = true
+ case 'm':
+ flags &^= OneLine
+ sawFlag = true
+ case 's':
+ flags |= DotNL
+ sawFlag = true
+ case 'U':
+ flags |= NonGreedy
+ sawFlag = true
+
+ // Switch to negation.
+ case '-':
+ if sign < 0 {
+ break Loop
+ }
+ sign = -1
+ // Invert flags so that | above turn into &^ and vice versa.
+ // We'll invert flags again before using it below.
+ flags = ^flags
+ sawFlag = false
+
+ // End of flags, starting group or not.
+ case ':', ')':
+ if sign < 0 {
+ if !sawFlag {
+ break Loop
+ }
+ flags = ^flags
+ }
+ if c == ':' {
+ // Open new group
+ p.op(opLeftParen)
+ }
+ p.flags = flags
+ return t, nil
+ }
+ }
+
+ return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
+}
+
+// isValidCaptureName reports whether name
+// is a valid capture name: [A-Za-z0-9_]+.
+// PCRE limits names to 32 bytes.
+// Python rejects names starting with digits.
+// We don't enforce either of those.
+func isValidCaptureName(name string) bool {
+ if name == "" {
+ return false
+ }
+ for _, c := range name {
+ if c != '_' && !isalnum(c) {
+ return false
+ }
+ }
+ return true
+}
+
+// parseInt parses a decimal integer.
+func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
+ if s == "" || s[0] < '0' || '9' < s[0] {
+ return
+ }
+ // Disallow leading zeros.
+ if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
+ return
+ }
+ t := s
+ for s != "" && '0' <= s[0] && s[0] <= '9' {
+ s = s[1:]
+ }
+ rest = s
+ ok = true
+ // Have digits, compute value.
+ t = t[:len(t)-len(s)]
+ for i := 0; i < len(t); i++ {
+ // Avoid overflow.
+ if n >= 1e8 {
+ n = -1
+ break
+ }
+ n = n*10 + int(t[i]) - '0'
+ }
+ return
+}
+
+// can this be represented as a character class?
+// single-rune literal string, char class, ., and .|\n.
+func isCharClass(re *Regexp) bool {
+ return re.Op == OpLiteral && len(re.Rune) == 1 ||
+ re.Op == OpCharClass ||
+ re.Op == OpAnyCharNotNL ||
+ re.Op == OpAnyChar
+}
+
+// does re match r?
+func matchRune(re *Regexp, r int) bool {
+ switch re.Op {
+ case OpLiteral:
+ return len(re.Rune) == 1 && re.Rune[0] == r
+ case OpCharClass:
+ for i := 0; i < len(re.Rune); i += 2 {
+ if re.Rune[i] <= r && r <= re.Rune[i+1] {
+ return true
+ }
+ }
+ return false
+ case OpAnyCharNotNL:
+ return r != '\n'
+ case OpAnyChar:
+ return true
+ }
+ return false
+}
+
+// parseVerticalBar handles a | in the input.
+func (p *parser) parseVerticalBar() os.Error {
+ p.concat()
+
+ // The concatenation we just parsed is on top of the stack.
+ // If it sits above an opVerticalBar, swap it below
+ // (things below an opVerticalBar become an alternation).
+ // Otherwise, push a new vertical bar.
+ if !p.swapVerticalBar() {
+ p.op(opVerticalBar)
+ }
+
+ return nil
+}
+
+// mergeCharClass makes dst = dst|src.
+// The caller must ensure that dst.Op >= src.Op,
+// to reduce the amount of copying.
+func mergeCharClass(dst, src *Regexp) {
+ switch dst.Op {
+ case OpAnyChar:
+ // src doesn't add anything.
+ case OpAnyCharNotNL:
+ // src might add \n
+ if matchRune(src, '\n') {
+ dst.Op = OpAnyChar
+ }
+ case OpCharClass:
+ // src is simpler, so either literal or char class
+ if src.Op == OpLiteral {
+ dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
+ } else {
+ dst.Rune = appendClass(dst.Rune, src.Rune)
+ }
+ case OpLiteral:
+ // both literal
+ if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
+ break
+ }
+ dst.Op = OpCharClass
+ dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
+ dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
+ }
+}
+
+// If the top of the stack is an element followed by an opVerticalBar
+// swapVerticalBar swaps the two and returns true.
+// Otherwise it returns false.
+func (p *parser) swapVerticalBar() bool {
+ // If above and below vertical bar are literal or char class,
+ // can merge into a single char class.
+ n := len(p.stack)
+ if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
+ re1 := p.stack[n-1]
+ re3 := p.stack[n-3]
+ // Make re3 the more complex of the two.
+ if re1.Op > re3.Op {
+ re1, re3 = re3, re1
+ p.stack[n-3] = re3
+ }
+ mergeCharClass(re3, re1)
+ p.reuse(re1)
+ p.stack = p.stack[:n-1]
+ return true
+ }
+
+ if n >= 2 {
+ re1 := p.stack[n-1]
+ re2 := p.stack[n-2]
+ if re2.Op == opVerticalBar {
+ if n >= 3 {
+ // Now out of reach.
+ // Clean opportunistically.
+ cleanAlt(p.stack[n-3])
+ }
+ p.stack[n-2] = re1
+ p.stack[n-1] = re2
+ return true
+ }
+ }
+ return false
+}
+
+// parseRightParen handles a ) in the input.
+func (p *parser) parseRightParen() os.Error {
+ p.concat()
+ if p.swapVerticalBar() {
+ // pop vertical bar
+ p.stack = p.stack[:len(p.stack)-1]
+ }
+ p.alternate()
+
+ n := len(p.stack)
+ if n < 2 {
+ return &Error{ErrInternalError, ""}
+ }
+ re1 := p.stack[n-1]
+ re2 := p.stack[n-2]
+ p.stack = p.stack[:n-2]
+ if re2.Op != opLeftParen {
+ return &Error{ErrMissingParen, p.wholeRegexp}
+ }
+ // Restore flags at time of paren.
+ p.flags = re2.Flags
+ if re2.Cap == 0 {
+ // Just for grouping.
+ p.push(re1)
+ } else {
+ re2.Op = OpCapture
+ re2.Sub = re2.Sub0[:1]
+ re2.Sub[0] = re1
+ p.push(re2)
+ }
+ return nil
+}
+
+// parseEscape parses an escape sequence at the beginning of s
+// and returns the rune.
+func (p *parser) parseEscape(s string) (r int, rest string, err os.Error) {
+ t := s[1:]
+ if t == "" {
+ return 0, "", &Error{ErrTrailingBackslash, ""}
+ }
+ c, t, err := nextRune(t)
+ if err != nil {
+ return 0, "", err
+ }
+
+Switch:
+ switch c {
+ default:
+ if c < utf8.RuneSelf && !isalnum(c) {
+ // Escaped non-word characters are always themselves.
+ // PCRE is not quite so rigorous: it accepts things like
+ // \q, but we don't. We once rejected \_, but too many
+ // programs and people insist on using it, so allow \_.
+ return c, t, nil
+ }
+
+ // Octal escapes.
+ case '1', '2', '3', '4', '5', '6', '7':
+ // Single non-zero digit is a backreference; not supported
+ if t == "" || t[0] < '0' || t[0] > '7' {
+ break
+ }
+ fallthrough
+ case '0':
+ // Consume up to three octal digits; already have one.
+ r = c - '0'
+ for i := 1; i < 3; i++ {
+ if t == "" || t[0] < '0' || t[0] > '7' {
+ break
+ }
+ r = r*8 + int(t[0]) - '0'
+ t = t[1:]
+ }
+ return r, t, nil
+
+ // Hexadecimal escapes.
+ case 'x':
+ if t == "" {
+ break
+ }
+ if c, t, err = nextRune(t); err != nil {
+ return 0, "", err
+ }
+ if c == '{' {
+ // Any number of digits in braces.
+ // Perl accepts any text at all; it ignores all text
+ // after the first non-hex digit. We require only hex digits,
+ // and at least one.
+ nhex := 0
+ r = 0
+ for {
+ if t == "" {
+ break Switch
+ }
+ if c, t, err = nextRune(t); err != nil {
+ return 0, "", err
+ }
+ if c == '}' {
+ break
+ }
+ v := unhex(c)
+ if v < 0 {
+ break Switch
+ }
+ r = r*16 + v
+ if r > unicode.MaxRune {
+ break Switch
+ }
+ nhex++
+ }
+ if nhex == 0 {
+ break Switch
+ }
+ return r, t, nil
+ }
+
+ // Easy case: two hex digits.
+ x := unhex(c)
+ if c, t, err = nextRune(t); err != nil {
+ return 0, "", err
+ }
+ y := unhex(c)
+ if x < 0 || y < 0 {
+ break
+ }
+ return x*16 + y, t, nil
+
+ // C escapes. There is no case 'b', to avoid misparsing
+ // the Perl word-boundary \b as the C backspace \b
+ // when in POSIX mode. In Perl, /\b/ means word-boundary
+ // but /[\b]/ means backspace. We don't support that.
+ // If you want a backspace, embed a literal backspace
+ // character or use \x08.
+ case 'a':
+ return '\a', t, err
+ case 'f':
+ return '\f', t, err
+ case 'n':
+ return '\n', t, err
+ case 'r':
+ return '\r', t, err
+ case 't':
+ return '\t', t, err
+ case 'v':
+ return '\v', t, err
+ }
+ return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
+}
+
+// parseClassChar parses a character class character at the beginning of s
+// and returns it.
+func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) {
+ if s == "" {
+ return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
+ }
+
+ // Allow regular escape sequences even though
+ // many need not be escaped in this context.
+ if s[0] == '\\' {
+ return p.parseEscape(s)
+ }
+
+ return nextRune(s)
+}
+
+type charGroup struct {
+ sign int
+ class []int
+}
+
+// parsePerlClassEscape parses a leading Perl character class escape like \d
+// from the beginning of s. If one is present, it appends the characters to r
+// and returns the new slice r and the remainder of the string.
+func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string) {
+ if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
+ return
+ }
+ g := perlGroup[s[0:2]]
+ if g.sign == 0 {
+ return
+ }
+ return p.appendGroup(r, g), s[2:]
+}
+
+// parseNamedClass parses a leading POSIX named character class like [:alnum:]
+// from the beginning of s. If one is present, it appends the characters to r
+// and returns the new slice r and the remainder of the string.
+func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err os.Error) {
+ if len(s) < 2 || s[0] != '[' || s[1] != ':' {
+ return
+ }
+
+ i := strings.Index(s[2:], ":]")
+ if i < 0 {
+ return
+ }
+ i += 2
+ name, s := s[0:i+2], s[i+2:]
+ g := posixGroup[name]
+ if g.sign == 0 {
+ return nil, "", &Error{ErrInvalidCharRange, name}
+ }
+ return p.appendGroup(r, g), s, nil
+}
+
+func (p *parser) appendGroup(r []int, g charGroup) []int {
+ if p.flags&FoldCase == 0 {
+ if g.sign < 0 {
+ r = appendNegatedClass(r, g.class)
+ } else {
+ r = appendClass(r, g.class)
+ }
+ } else {
+ tmp := p.tmpClass[:0]
+ tmp = appendFoldedClass(tmp, g.class)
+ p.tmpClass = tmp
+ tmp = cleanClass(&p.tmpClass)
+ if g.sign < 0 {
+ r = appendNegatedClass(r, tmp)
+ } else {
+ r = appendClass(r, tmp)
+ }
+ }
+ return r
+}
+
+var anyTable = &unicode.RangeTable{
+ []unicode.Range16{{0, 1<<16 - 1, 1}},
+ []unicode.Range32{{1 << 16, unicode.MaxRune, 1}},
+}
+
+// unicodeTable returns the unicode.RangeTable identified by name
+// and the table of additional fold-equivalent code points.
+func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
+ // Special case: "Any" means any.
+ if name == "Any" {
+ return anyTable, anyTable
+ }
+ if t := unicode.Categories[name]; t != nil {
+ return t, unicode.FoldCategory[name]
+ }
+ if t := unicode.Scripts[name]; t != nil {
+ return t, unicode.FoldScript[name]
+ }
+ return nil, nil
+}
+
+// parseUnicodeClass parses a leading Unicode character class like \p{Han}
+// from the beginning of s. If one is present, it appends the characters to r
+// and returns the new slice r and the remainder of the string.
+func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, err os.Error) {
+ if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
+ return
+ }
+
+ // Committed to parse or return error.
+ sign := +1
+ if s[1] == 'P' {
+ sign = -1
+ }
+ t := s[2:]
+ c, t, err := nextRune(t)
+ if err != nil {
+ return
+ }
+ var seq, name string
+ if c != '{' {
+ // Single-letter name.
+ seq = s[:len(s)-len(t)]
+ name = seq[2:]
+ } else {
+ // Name is in braces.
+ end := strings.IndexRune(s, '}')
+ if end < 0 {
+ if err = checkUTF8(s); err != nil {
+ return
+ }
+ return nil, "", &Error{ErrInvalidCharRange, s}
+ }
+ seq, t = s[:end+1], s[end+1:]
+ name = s[3:end]
+ if err = checkUTF8(name); err != nil {
+ return
+ }
+ }
+
+ // Group can have leading negation too. \p{^Han} == \P{Han}, \P{^Han} == \p{Han}.
+ if name != "" && name[0] == '^' {
+ sign = -sign
+ name = name[1:]
+ }
+
+ tab, fold := unicodeTable(name)
+ if tab == nil {
+ return nil, "", &Error{ErrInvalidCharRange, seq}
+ }
+
+ if p.flags&FoldCase == 0 || fold == nil {
+ if sign > 0 {
+ r = appendTable(r, tab)
+ } else {
+ r = appendNegatedTable(r, tab)
+ }
+ } else {
+ // Merge and clean tab and fold in a temporary buffer.
+ // This is necessary for the negative case and just tidy
+ // for the positive case.
+ tmp := p.tmpClass[:0]
+ tmp = appendTable(tmp, tab)
+ tmp = appendTable(tmp, fold)
+ p.tmpClass = tmp
+ tmp = cleanClass(&p.tmpClass)
+ if sign > 0 {
+ r = appendClass(r, tmp)
+ } else {
+ r = appendNegatedClass(r, tmp)
+ }
+ }
+ return r, t, nil
+}
+
+// parseClass parses a character class at the beginning of s
+// and pushes it onto the parse stack.
+func (p *parser) parseClass(s string) (rest string, err os.Error) {
+ t := s[1:] // chop [
+ re := p.newRegexp(OpCharClass)
+ re.Flags = p.flags
+ re.Rune = re.Rune0[:0]
+
+ sign := +1
+ if t != "" && t[0] == '^' {
+ sign = -1
+ t = t[1:]
+
+ // If character class does not match \n, add it here,
+ // so that negation later will do the right thing.
+ if p.flags&ClassNL == 0 {
+ re.Rune = append(re.Rune, '\n', '\n')
+ }
+ }
+
+ class := re.Rune
+ first := true // ] and - are okay as first char in class
+ for t == "" || t[0] != ']' || first {
+ // POSIX: - is only okay unescaped as first or last in class.
+ // Perl: - is okay anywhere.
+ if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
+ _, size := utf8.DecodeRuneInString(t[1:])
+ return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
+ }
+ first = false
+
+ // Look for POSIX [:alnum:] etc.
+ if len(t) > 2 && t[0] == '[' && t[1] == ':' {
+ nclass, nt, err := p.parseNamedClass(t, class)
+ if err != nil {
+ return "", err
+ }
+ if nclass != nil {
+ class, t = nclass, nt
+ continue
+ }
+ }
+
+ // Look for Unicode character group like \p{Han}.
+ nclass, nt, err := p.parseUnicodeClass(t, class)
+ if err != nil {
+ return "", err
+ }
+ if nclass != nil {
+ class, t = nclass, nt
+ continue
+ }
+
+ // Look for Perl character class symbols (extension).
+ if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
+ class, t = nclass, nt
+ continue
+ }
+
+ // Single character or simple range.
+ rng := t
+ var lo, hi int
+ if lo, t, err = p.parseClassChar(t, s); err != nil {
+ return "", err
+ }
+ hi = lo
+ // [a-] means (a|-) so check for final ].
+ if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
+ t = t[1:]
+ if hi, t, err = p.parseClassChar(t, s); err != nil {
+ return "", err
+ }
+ if hi < lo {
+ rng = rng[:len(rng)-len(t)]
+ return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
+ }
+ }
+ if p.flags&FoldCase == 0 {
+ class = AppendRange(class, lo, hi)
+ } else {
+ class = appendFoldedRange(class, lo, hi)
+ }
+ }
+ t = t[1:] // chop ]
+
+ // Use &re.Rune instead of &class to avoid allocation.
+ re.Rune = class
+ class = cleanClass(&re.Rune)
+ if sign < 0 {
+ class = negateClass(class)
+ }
+ re.Rune = class
+ p.push(re)
+ return t, nil
+}
+
+// cleanClass sorts the ranges (pairs of elements of r),
+// merges them, and eliminates duplicates.
+func cleanClass(rp *[]int) []int {
+
+ // Sort by lo increasing, hi decreasing to break ties.
+ sort.Sort(ranges{rp})
+
+ r := *rp
+ if len(r) < 2 {
+ return r
+ }
+
+ // Merge abutting, overlapping.
+ w := 2 // write index
+ for i := 2; i < len(r); i += 2 {
+ lo, hi := r[i], r[i+1]
+ if lo <= r[w-1]+1 {
+ // merge with previous range
+ if hi > r[w-1] {
+ r[w-1] = hi
+ }
+ continue
+ }
+ // new disjoint range
+ r[w] = lo
+ r[w+1] = hi
+ w += 2
+ }
+
+ return r[:w]
+}
+
+// appendLiteral returns the result of appending the literal x to the class r.
+func appendLiteral(r []int, x int, flags Flags) []int {
+ if flags&FoldCase != 0 {
+ return appendFoldedRange(r, x, x)
+ }
+ return AppendRange(r, x, x)
+}
+
+// AppendRange returns the result of appending the range lo-hi to the class r.
+func AppendRange(r []int, lo, hi int) []int {
+ // Expand last range or next to last range if it overlaps or abuts.
+ // Checking two ranges helps when appending case-folded
+ // alphabets, so that one range can be expanding A-Z and the
+ // other expanding a-z.
+ n := len(r)
+ for i := 2; i <= 4; i += 2 { // twice, using i=2, i=4
+ if n >= i {
+ rlo, rhi := r[n-i], r[n-i+1]
+ if lo <= rhi+1 && rlo <= hi+1 {
+ if lo < rlo {
+ r[n-i] = lo
+ }
+ if hi > rhi {
+ r[n-i+1] = hi
+ }
+ return r
+ }
+ }
+ }
+
+ return append(r, lo, hi)
+}
+
+const (
+ // minimum and maximum runes involved in folding.
+ // checked during test.
+ MinFold = 0x0041
+ MaxFold = 0x1044f
+)
+
+// appendFoldedRange returns the result of appending the range lo-hi
+// and its case folding-equivalent runes to the class r.
+func appendFoldedRange(r []int, lo, hi int) []int {
+ // Optimizations.
+ if lo <= MinFold && hi >= MaxFold {
+ // Range is full: folding can't add more.
+ return AppendRange(r, lo, hi)
+ }
+ if hi < MinFold || lo > MaxFold {
+ // Range is outside folding possibilities.
+ return AppendRange(r, lo, hi)
+ }
+ if lo < MinFold {
+ // [lo, MinFold-1] needs no folding.
+ r = AppendRange(r, lo, MinFold-1)
+ lo = MinFold
+ }
+ if hi > MaxFold {
+ // [MaxFold+1, hi] needs no folding.
+ r = AppendRange(r, MaxFold+1, hi)
+ hi = MaxFold
+ }
+
+ // Brute force. Depend on AppendRange to coalesce ranges on the fly.
+ for c := lo; c <= hi; c++ {
+ r = AppendRange(r, c, c)
+ f := unicode.SimpleFold(c)
+ for f != c {
+ r = AppendRange(r, f, f)
+ f = unicode.SimpleFold(f)
+ }
+ }
+ return r
+}
+
+// appendClass returns the result of appending the class x to the class r.
+// It assume x is clean.
+func appendClass(r []int, x []int) []int {
+ for i := 0; i < len(x); i += 2 {
+ r = AppendRange(r, x[i], x[i+1])
+ }
+ return r
+}
+
+// appendFolded returns the result of appending the case folding of the class x to the class r.
+func appendFoldedClass(r []int, x []int) []int {
+ for i := 0; i < len(x); i += 2 {
+ r = appendFoldedRange(r, x[i], x[i+1])
+ }
+ return r
+}
+
+// appendNegatedClass returns the result of appending the negation of the class x to the class r.
+// It assumes x is clean.
+func appendNegatedClass(r []int, x []int) []int {
+ nextLo := 0
+ for i := 0; i < len(x); i += 2 {
+ lo, hi := x[i], x[i+1]
+ if nextLo <= lo-1 {
+ r = AppendRange(r, nextLo, lo-1)
+ }
+ nextLo = hi + 1
+ }
+ if nextLo <= unicode.MaxRune {
+ r = AppendRange(r, nextLo, unicode.MaxRune)
+ }
+ return r
+}
+
+// appendTable returns the result of appending x to the class r.
+func appendTable(r []int, x *unicode.RangeTable) []int {
+ for _, xr := range x.R16 {
+ lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+ if stride == 1 {
+ r = AppendRange(r, lo, hi)
+ continue
+ }
+ for c := lo; c <= hi; c += stride {
+ r = AppendRange(r, c, c)
+ }
+ }
+ for _, xr := range x.R32 {
+ lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+ if stride == 1 {
+ r = AppendRange(r, lo, hi)
+ continue
+ }
+ for c := lo; c <= hi; c += stride {
+ r = AppendRange(r, c, c)
+ }
+ }
+ return r
+}
+
+// appendNegatedTable returns the result of appending the negation of x to the class r.
+func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
+ nextLo := 0 // lo end of next class to add
+ for _, xr := range x.R16 {
+ lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+ if stride == 1 {
+ if nextLo <= lo-1 {
+ r = AppendRange(r, nextLo, lo-1)
+ }
+ nextLo = hi + 1
+ continue
+ }
+ for c := lo; c <= hi; c += stride {
+ if nextLo <= c-1 {
+ r = AppendRange(r, nextLo, c-1)
+ }
+ nextLo = c + 1
+ }
+ }
+ for _, xr := range x.R32 {
+ lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+ if stride == 1 {
+ if nextLo <= lo-1 {
+ r = AppendRange(r, nextLo, lo-1)
+ }
+ nextLo = hi + 1
+ continue
+ }
+ for c := lo; c <= hi; c += stride {
+ if nextLo <= c-1 {
+ r = AppendRange(r, nextLo, c-1)
+ }
+ nextLo = c + 1
+ }
+ }
+ if nextLo <= unicode.MaxRune {
+ r = AppendRange(r, nextLo, unicode.MaxRune)
+ }
+ return r
+}
+
+// negateClass overwrites r and returns r's negation.
+// It assumes the class r is already clean.
+func negateClass(r []int) []int {
+ nextLo := 0 // lo end of next class to add
+ w := 0 // write index
+ for i := 0; i < len(r); i += 2 {
+ lo, hi := r[i], r[i+1]
+ if nextLo <= lo-1 {
+ r[w] = nextLo
+ r[w+1] = lo - 1
+ w += 2
+ }
+ nextLo = hi + 1
+ }
+ r = r[:w]
+ if nextLo <= unicode.MaxRune {
+ // It's possible for the negation to have one more
+ // range - this one - than the original class, so use append.
+ r = append(r, nextLo, unicode.MaxRune)
+ }
+ return r
+}
+
+// ranges implements sort.Interface on a []rune.
+// The choice of receiver type definition is strange
+// but avoids an allocation since we already have
+// a *[]int.
+type ranges struct {
+ p *[]int
+}
+
+func (ra ranges) Less(i, j int) bool {
+ p := *ra.p
+ i *= 2
+ j *= 2
+ return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
+}
+
+func (ra ranges) Len() int {
+ return len(*ra.p) / 2
+}
+
+func (ra ranges) Swap(i, j int) {
+ p := *ra.p
+ i *= 2
+ j *= 2
+ p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
+}
+
+func checkUTF8(s string) os.Error {
+ for s != "" {
+ rune, size := utf8.DecodeRuneInString(s)
+ if rune == utf8.RuneError && size == 1 {
+ return &Error{Code: ErrInvalidUTF8, Expr: s}
+ }
+ s = s[size:]
+ }
+ return nil
+}
+
+func nextRune(s string) (c int, t string, err os.Error) {
+ c, size := utf8.DecodeRuneInString(s)
+ if c == utf8.RuneError && size == 1 {
+ return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
+ }
+ return c, s[size:], nil
+}
+
+func isalnum(c int) bool {
+ return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+}
+
+func unhex(c int) int {
+ if '0' <= c && c <= '9' {
+ return c - '0'
+ }
+ if 'a' <= c && c <= 'f' {
+ return c - 'a' + 10
+ }
+ if 'A' <= c && c <= 'F' {
+ return c - 'A' + 10
+ }
+ return -1
+}
diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go
new file mode 100644
index 00000000000..5d9085bfb1f
--- /dev/null
+++ b/libgo/go/regexp/syntax/parse_test.go
@@ -0,0 +1,552 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax_test
+
+import (
+ . "regexp/syntax"
+ "bytes"
+ "fmt"
+ "testing"
+ "unicode"
+)
+
+type parseTest struct {
+ Regexp string
+ Dump string
+}
+
+var parseTests = []parseTest{
+ // Base cases
+ {`a`, `lit{a}`},
+ {`a.`, `cat{lit{a}dot{}}`},
+ {`a.b`, `cat{lit{a}dot{}lit{b}}`},
+ {`ab`, `str{ab}`},
+ {`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`},
+ {`abc`, `str{abc}`},
+ {`a|^`, `alt{lit{a}bol{}}`},
+ {`a|b`, `cc{0x61-0x62}`},
+ {`(a)`, `cap{lit{a}}`},
+ {`(a)|b`, `alt{cap{lit{a}}lit{b}}`},
+ {`a*`, `star{lit{a}}`},
+ {`a+`, `plus{lit{a}}`},
+ {`a?`, `que{lit{a}}`},
+ {`a{2}`, `rep{2,2 lit{a}}`},
+ {`a{2,3}`, `rep{2,3 lit{a}}`},
+ {`a{2,}`, `rep{2,-1 lit{a}}`},
+ {`a*?`, `nstar{lit{a}}`},
+ {`a+?`, `nplus{lit{a}}`},
+ {`a??`, `nque{lit{a}}`},
+ {`a{2}?`, `nrep{2,2 lit{a}}`},
+ {`a{2,3}?`, `nrep{2,3 lit{a}}`},
+ {`a{2,}?`, `nrep{2,-1 lit{a}}`},
+ // Malformed { } are treated as literals.
+ {`x{1001`, `str{x{1001}`},
+ {`x{9876543210`, `str{x{9876543210}`},
+ {`x{9876543210,`, `str{x{9876543210,}`},
+ {`x{2,1`, `str{x{2,1}`},
+ {`x{1,9876543210`, `str{x{1,9876543210}`},
+ {``, `emp{}`},
+ {`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
+ {`|x|`, `alt{emp{}lit{x}emp{}}`},
+ {`.`, `dot{}`},
+ {`^`, `bol{}`},
+ {`$`, `eol{}`},
+ {`\|`, `lit{|}`},
+ {`\(`, `lit{(}`},
+ {`\)`, `lit{)}`},
+ {`\*`, `lit{*}`},
+ {`\+`, `lit{+}`},
+ {`\?`, `lit{?}`},
+ {`{`, `lit{{}`},
+ {`}`, `lit{}}`},
+ {`\.`, `lit{.}`},
+ {`\^`, `lit{^}`},
+ {`\$`, `lit{$}`},
+ {`\\`, `lit{\}`},
+ {`[ace]`, `cc{0x61 0x63 0x65}`},
+ {`[abc]`, `cc{0x61-0x63}`},
+ {`[a-z]`, `cc{0x61-0x7a}`},
+ {`[a]`, `lit{a}`},
+ {`\-`, `lit{-}`},
+ {`-`, `lit{-}`},
+ {`\_`, `lit{_}`},
+ {`abc`, `str{abc}`},
+ {`abc|def`, `alt{str{abc}str{def}}`},
+ {`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},
+
+ // Posix and Perl extensions
+ {`[[:lower:]]`, `cc{0x61-0x7a}`},
+ {`[a-z]`, `cc{0x61-0x7a}`},
+ {`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
+ {`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
+ {`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
+ {`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
+ {`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
+ {`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
+ {`\d`, `cc{0x30-0x39}`},
+ {`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`},
+ {`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`},
+ {`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`},
+ {`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`},
+ {`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`},
+ {`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`},
+ {`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
+ {`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`},
+ // { `\C`, `byte{}` }, // probably never
+
+ // Unicode, negatives, and a double negative.
+ {`\p{Braille}`, `cc{0x2800-0x28ff}`},
+ {`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`\P{^Braille}`, `cc{0x2800-0x28ff}`},
+ {`\pZ`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
+ {`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
+ {`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
+ {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
+ {`\p{Lu}`, mkCharClass(unicode.IsUpper)},
+ {`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
+ {`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
+ {`\p{Any}`, `dot{}`},
+ {`\p{^Any}`, `cc{}`},
+
+ // Hex, octal.
+ {`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
+ {`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},
+
+ // More interesting regular expressions.
+ {`a{,2}`, `str{a{,2}}`},
+ {`\.\^\$\\`, `str{.^$\}`},
+ {`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`},
+ {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
+ {`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8
+ {`a*{`, `cat{star{lit{a}}lit{{}}`},
+
+ // Test precedences
+ {`(?:ab)*`, `star{str{ab}}`},
+ {`(ab)*`, `star{cap{str{ab}}}`},
+ {`ab|cd`, `alt{str{ab}str{cd}}`},
+ {`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},
+
+ // Test flattening.
+ {`(?:a)`, `lit{a}`},
+ {`(?:ab)(?:cd)`, `str{abcd}`},
+ {`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
+ {`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
+ {`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`},
+ {`a|.`, `dot{}`},
+ {`.|a`, `dot{}`},
+ {`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`},
+ {`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},
+
+ // Test Perl quoted literals
+ {`\Q+|*?{[\E`, `str{+|*?{[}`},
+ {`\Q+\E+`, `plus{lit{+}}`},
+ {`\Q\\E`, `lit{\}`},
+ {`\Q\\\E`, `str{\\}`},
+
+ // Test Perl \A and \z
+ {`(?m)^`, `bol{}`},
+ {`(?m)$`, `eol{}`},
+ {`(?-m)^`, `bot{}`},
+ {`(?-m)$`, `eot{}`},
+ {`(?m)\A`, `bot{}`},
+ {`(?m)\z`, `eot{\z}`},
+ {`(?-m)\A`, `bot{}`},
+ {`(?-m)\z`, `eot{\z}`},
+
+ // Test named captures
+ {`(?P<name>a)`, `cap{name:lit{a}}`},
+
+ // Case-folded literals
+ {`[Aa]`, `litfold{A}`},
+ {`[\x{100}\x{101}]`, `litfold{Ä€}`},
+ {`[Δδ]`, `litfold{Δ}`},
+
+ // Strings
+ {`abcde`, `str{abcde}`},
+ {`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`},
+
+ // Factoring.
+ {`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`},
+ {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`},
+
+ // Bug fixes.
+ {`(?:.)`, `dot{}`},
+ {`(?:x|(?:xa))`, `cat{lit{x}alt{emp{}lit{a}}}`},
+ {`(?:.|(?:.a))`, `cat{dot{}alt{emp{}lit{a}}}`},
+ {`(?:A(?:A|a))`, `cat{lit{A}litfold{A}}`},
+ {`(?:A|a)`, `litfold{A}`},
+ {`A|(?:A|a)`, `litfold{A}`},
+ {`(?s).`, `dot{}`},
+ {`(?-s).`, `dnl{}`},
+ {`(?:(?:^).)`, `cat{bol{}dot{}}`},
+ {`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
+
+ // RE2 prefix_tests
+ {`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
+ {`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`},
+ {`abc|abd|aef|bcx|bcy`,
+ `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` +
+ `cat{str{bc}cc{0x78-0x79}}}`},
+ {`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`},
+ {`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`},
+ {`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`},
+ {`(?:xx|yy)c|(?:xx|yy)d`,
+ `cat{alt{str{xx}str{yy}}cc{0x63-0x64}}`},
+ {`x{2}|x{2}[0-9]`,
+ `cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
+ {`x{2}y|x{2}[0-9]y`,
+ `cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
+}
+
+const testFlags = MatchNL | PerlX | UnicodeGroups
+
+func TestParseSimple(t *testing.T) {
+ testParseDump(t, parseTests, testFlags)
+}
+
+var foldcaseTests = []parseTest{
+ {`AbCdE`, `strfold{ABCDE}`},
+ {`[Aa]`, `litfold{A}`},
+ {`a`, `litfold{A}`},
+
+ // 0x17F is an old English long s (looks like an f) and folds to s.
+ // 0x212A is the Kelvin symbol and folds to k.
+ {`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...]
+ {`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
+ {`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
+}
+
+func TestParseFoldCase(t *testing.T) {
+ testParseDump(t, foldcaseTests, FoldCase)
+}
+
+var literalTests = []parseTest{
+ {"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"},
+}
+
+func TestParseLiteral(t *testing.T) {
+ testParseDump(t, literalTests, Literal)
+}
+
+var matchnlTests = []parseTest{
+ {`.`, `dot{}`},
+ {"\n", "lit{\n}"},
+ {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
+ {`[a\n]`, `cc{0xa 0x61}`},
+}
+
+func TestParseMatchNL(t *testing.T) {
+ testParseDump(t, matchnlTests, MatchNL)
+}
+
+var nomatchnlTests = []parseTest{
+ {`.`, `dnl{}`},
+ {"\n", "lit{\n}"},
+ {`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`},
+ {`[a\n]`, `cc{0xa 0x61}`},
+}
+
+func TestParseNoMatchNL(t *testing.T) {
+ testParseDump(t, nomatchnlTests, 0)
+}
+
+// Test Parse -> Dump.
+func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
+ for _, tt := range tests {
+ re, err := Parse(tt.Regexp, flags)
+ if err != nil {
+ t.Errorf("Parse(%#q): %v", tt.Regexp, err)
+ continue
+ }
+ d := dump(re)
+ if d != tt.Dump {
+ t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
+ }
+ }
+}
+
+// dump prints a string representation of the regexp showing
+// the structure explicitly.
+func dump(re *Regexp) string {
+ var b bytes.Buffer
+ dumpRegexp(&b, re)
+ return b.String()
+}
+
+var opNames = []string{
+ OpNoMatch: "no",
+ OpEmptyMatch: "emp",
+ OpLiteral: "lit",
+ OpCharClass: "cc",
+ OpAnyCharNotNL: "dnl",
+ OpAnyChar: "dot",
+ OpBeginLine: "bol",
+ OpEndLine: "eol",
+ OpBeginText: "bot",
+ OpEndText: "eot",
+ OpWordBoundary: "wb",
+ OpNoWordBoundary: "nwb",
+ OpCapture: "cap",
+ OpStar: "star",
+ OpPlus: "plus",
+ OpQuest: "que",
+ OpRepeat: "rep",
+ OpConcat: "cat",
+ OpAlternate: "alt",
+}
+
+// dumpRegexp writes an encoding of the syntax tree for the regexp re to b.
+// It is used during testing to distinguish between parses that might print
+// the same using re's String method.
+func dumpRegexp(b *bytes.Buffer, re *Regexp) {
+ if int(re.Op) >= len(opNames) || opNames[re.Op] == "" {
+ fmt.Fprintf(b, "op%d", re.Op)
+ } else {
+ switch re.Op {
+ default:
+ b.WriteString(opNames[re.Op])
+ case OpStar, OpPlus, OpQuest, OpRepeat:
+ if re.Flags&NonGreedy != 0 {
+ b.WriteByte('n')
+ }
+ b.WriteString(opNames[re.Op])
+ case OpLiteral:
+ if len(re.Rune) > 1 {
+ b.WriteString("str")
+ } else {
+ b.WriteString("lit")
+ }
+ if re.Flags&FoldCase != 0 {
+ for _, r := range re.Rune {
+ if unicode.SimpleFold(r) != r {
+ b.WriteString("fold")
+ break
+ }
+ }
+ }
+ }
+ }
+ b.WriteByte('{')
+ switch re.Op {
+ case OpEndText:
+ if re.Flags&WasDollar == 0 {
+ b.WriteString(`\z`)
+ }
+ case OpLiteral:
+ for _, r := range re.Rune {
+ b.WriteRune(r)
+ }
+ case OpConcat, OpAlternate:
+ for _, sub := range re.Sub {
+ dumpRegexp(b, sub)
+ }
+ case OpStar, OpPlus, OpQuest:
+ dumpRegexp(b, re.Sub[0])
+ case OpRepeat:
+ fmt.Fprintf(b, "%d,%d ", re.Min, re.Max)
+ dumpRegexp(b, re.Sub[0])
+ case OpCapture:
+ if re.Name != "" {
+ b.WriteString(re.Name)
+ b.WriteByte(':')
+ }
+ dumpRegexp(b, re.Sub[0])
+ case OpCharClass:
+ sep := ""
+ for i := 0; i < len(re.Rune); i += 2 {
+ b.WriteString(sep)
+ sep = " "
+ lo, hi := re.Rune[i], re.Rune[i+1]
+ if lo == hi {
+ fmt.Fprintf(b, "%#x", lo)
+ } else {
+ fmt.Fprintf(b, "%#x-%#x", lo, hi)
+ }
+ }
+ }
+ b.WriteByte('}')
+}
+
+func mkCharClass(f func(int) bool) string {
+ re := &Regexp{Op: OpCharClass}
+ lo := -1
+ for i := 0; i <= unicode.MaxRune; i++ {
+ if f(i) {
+ if lo < 0 {
+ lo = i
+ }
+ } else {
+ if lo >= 0 {
+ re.Rune = append(re.Rune, lo, i-1)
+ lo = -1
+ }
+ }
+ }
+ if lo >= 0 {
+ re.Rune = append(re.Rune, lo, unicode.MaxRune)
+ }
+ return dump(re)
+}
+
+func isUpperFold(rune int) bool {
+ if unicode.IsUpper(rune) {
+ return true
+ }
+ c := unicode.SimpleFold(rune)
+ for c != rune {
+ if unicode.IsUpper(c) {
+ return true
+ }
+ c = unicode.SimpleFold(c)
+ }
+ return false
+}
+
+func TestFoldConstants(t *testing.T) {
+ last := -1
+ for i := 0; i <= unicode.MaxRune; i++ {
+ if unicode.SimpleFold(i) == i {
+ continue
+ }
+ if last == -1 && MinFold != i {
+ t.Errorf("MinFold=%#U should be %#U", MinFold, i)
+ }
+ last = i
+ }
+ if MaxFold != last {
+ t.Errorf("MaxFold=%#U should be %#U", MaxFold, last)
+ }
+}
+
+func TestAppendRangeCollapse(t *testing.T) {
+ // AppendRange should collapse each of the new ranges
+ // into the earlier ones (it looks back two ranges), so that
+ // the slice never grows very large.
+ // Note that we are not calling cleanClass.
+ var r []int
+ for i := 'A'; i <= 'Z'; i++ {
+ r = AppendRange(r, i, i)
+ r = AppendRange(r, i+'a'-'A', i+'a'-'A')
+ }
+ if string(r) != "AZaz" {
+ t.Errorf("AppendRange interlaced A-Z a-z = %s, want AZaz", string(r))
+ }
+}
+
+var invalidRegexps = []string{
+ `(`,
+ `)`,
+ `(a`,
+ `(a|b|`,
+ `(a|b`,
+ `[a-z`,
+ `([a-z)`,
+ `x{1001}`,
+ `x{9876543210}`,
+ `x{2,1}`,
+ `x{1,9876543210}`,
+ "\xff", // Invalid UTF-8
+ "[\xff]",
+ "[\\\xff]",
+ "\\\xff",
+ `(?P<name>a`,
+ `(?P<name>`,
+ `(?P<name`,
+ `(?P<x y>a)`,
+ `(?P<>a)`,
+ `[a-Z]`,
+ `(?i)[a-Z]`,
+ `a{100000}`,
+ `a{100000,}`,
+}
+
+var onlyPerl = []string{
+ `[a-b-c]`,
+ `\Qabc\E`,
+ `\Q*+?{[\E`,
+ `\Q\\E`,
+ `\Q\\\E`,
+ `\Q\\\\E`,
+ `\Q\\\\\E`,
+ `(?:a)`,
+ `(?P<name>a)`,
+}
+
+var onlyPOSIX = []string{
+ "a++",
+ "a**",
+ "a?*",
+ "a+*",
+ "a{1}*",
+ ".{1}{2}.{3}",
+}
+
+func TestParseInvalidRegexps(t *testing.T) {
+ for _, regexp := range invalidRegexps {
+ if re, err := Parse(regexp, Perl); err == nil {
+ t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
+ }
+ if re, err := Parse(regexp, POSIX); err == nil {
+ t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
+ }
+ }
+ for _, regexp := range onlyPerl {
+ if _, err := Parse(regexp, Perl); err != nil {
+ t.Errorf("Parse(%#q, Perl): %v", regexp, err)
+ }
+ if re, err := Parse(regexp, POSIX); err == nil {
+ t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
+ }
+ }
+ for _, regexp := range onlyPOSIX {
+ if re, err := Parse(regexp, Perl); err == nil {
+ t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
+ }
+ if _, err := Parse(regexp, POSIX); err != nil {
+ t.Errorf("Parse(%#q, POSIX): %v", regexp, err)
+ }
+ }
+}
+
+func TestToStringEquivalentParse(t *testing.T) {
+ for _, tt := range parseTests {
+ re, err := Parse(tt.Regexp, testFlags)
+ if err != nil {
+ t.Errorf("Parse(%#q): %v", tt.Regexp, err)
+ continue
+ }
+ d := dump(re)
+ if d != tt.Dump {
+ t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
+ continue
+ }
+
+ s := re.String()
+ if s != tt.Regexp {
+ // If ToString didn't return the original regexp,
+ // it must have found one with fewer parens.
+ // Unfortunately we can't check the length here, because
+ // ToString produces "\\{" for a literal brace,
+ // but "{" is a shorter equivalent in some contexts.
+ nre, err := Parse(s, testFlags)
+ if err != nil {
+ t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, t, err)
+ continue
+ }
+ nd := dump(nre)
+ if d != nd {
+ t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd)
+ }
+
+ ns := nre.String()
+ if s != ns {
+ t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns)
+ }
+ }
+ }
+}
diff --git a/libgo/go/regexp/syntax/perl_groups.go b/libgo/go/regexp/syntax/perl_groups.go
new file mode 100644
index 00000000000..05b392c40d8
--- /dev/null
+++ b/libgo/go/regexp/syntax/perl_groups.go
@@ -0,0 +1,130 @@
+// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
+// make_perl_groups.pl >perl_groups.go
+
+package syntax
+
+var code1 = []int{ /* \d */
+ 0x30, 0x39,
+}
+
+var code2 = []int{ /* \s */
+ 0x9, 0xa,
+ 0xc, 0xd,
+ 0x20, 0x20,
+}
+
+var code3 = []int{ /* \w */
+ 0x30, 0x39,
+ 0x41, 0x5a,
+ 0x5f, 0x5f,
+ 0x61, 0x7a,
+}
+
+var perlGroup = map[string]charGroup{
+ `\d`: {+1, code1},
+ `\D`: {-1, code1},
+ `\s`: {+1, code2},
+ `\S`: {-1, code2},
+ `\w`: {+1, code3},
+ `\W`: {-1, code3},
+}
+var code4 = []int{ /* [:alnum:] */
+ 0x30, 0x39,
+ 0x41, 0x5a,
+ 0x61, 0x7a,
+}
+
+var code5 = []int{ /* [:alpha:] */
+ 0x41, 0x5a,
+ 0x61, 0x7a,
+}
+
+var code6 = []int{ /* [:ascii:] */
+ 0x0, 0x7f,
+}
+
+var code7 = []int{ /* [:blank:] */
+ 0x9, 0x9,
+ 0x20, 0x20,
+}
+
+var code8 = []int{ /* [:cntrl:] */
+ 0x0, 0x1f,
+ 0x7f, 0x7f,
+}
+
+var code9 = []int{ /* [:digit:] */
+ 0x30, 0x39,
+}
+
+var code10 = []int{ /* [:graph:] */
+ 0x21, 0x7e,
+}
+
+var code11 = []int{ /* [:lower:] */
+ 0x61, 0x7a,
+}
+
+var code12 = []int{ /* [:print:] */
+ 0x20, 0x7e,
+}
+
+var code13 = []int{ /* [:punct:] */
+ 0x21, 0x2f,
+ 0x3a, 0x40,
+ 0x5b, 0x60,
+ 0x7b, 0x7e,
+}
+
+var code14 = []int{ /* [:space:] */
+ 0x9, 0xd,
+ 0x20, 0x20,
+}
+
+var code15 = []int{ /* [:upper:] */
+ 0x41, 0x5a,
+}
+
+var code16 = []int{ /* [:word:] */
+ 0x30, 0x39,
+ 0x41, 0x5a,
+ 0x5f, 0x5f,
+ 0x61, 0x7a,
+}
+
+var code17 = []int{ /* [:xdigit:] */
+ 0x30, 0x39,
+ 0x41, 0x46,
+ 0x61, 0x66,
+}
+
+var posixGroup = map[string]charGroup{
+ `[:alnum:]`: {+1, code4},
+ `[:^alnum:]`: {-1, code4},
+ `[:alpha:]`: {+1, code5},
+ `[:^alpha:]`: {-1, code5},
+ `[:ascii:]`: {+1, code6},
+ `[:^ascii:]`: {-1, code6},
+ `[:blank:]`: {+1, code7},
+ `[:^blank:]`: {-1, code7},
+ `[:cntrl:]`: {+1, code8},
+ `[:^cntrl:]`: {-1, code8},
+ `[:digit:]`: {+1, code9},
+ `[:^digit:]`: {-1, code9},
+ `[:graph:]`: {+1, code10},
+ `[:^graph:]`: {-1, code10},
+ `[:lower:]`: {+1, code11},
+ `[:^lower:]`: {-1, code11},
+ `[:print:]`: {+1, code12},
+ `[:^print:]`: {-1, code12},
+ `[:punct:]`: {+1, code13},
+ `[:^punct:]`: {-1, code13},
+ `[:space:]`: {+1, code14},
+ `[:^space:]`: {-1, code14},
+ `[:upper:]`: {+1, code15},
+ `[:^upper:]`: {-1, code15},
+ `[:word:]`: {+1, code16},
+ `[:^word:]`: {-1, code16},
+ `[:xdigit:]`: {+1, code17},
+ `[:^xdigit:]`: {-1, code17},
+}
diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go
new file mode 100644
index 00000000000..ced45da077b
--- /dev/null
+++ b/libgo/go/regexp/syntax/prog.go
@@ -0,0 +1,306 @@
+package syntax
+
+import (
+ "bytes"
+ "strconv"
+ "unicode"
+)
+
+// Compiled program.
+// May not belong in this package, but convenient for now.
+
+// A Prog is a compiled regular expression program.
+type Prog struct {
+ Inst []Inst
+ Start int // index of start instruction
+ NumCap int // number of InstCapture insts in re
+}
+
+// An InstOp is an instruction opcode.
+type InstOp uint8
+
+const (
+ InstAlt InstOp = iota
+ InstAltMatch
+ InstCapture
+ InstEmptyWidth
+ InstMatch
+ InstFail
+ InstNop
+ InstRune
+ InstRune1
+ InstRuneAny
+ InstRuneAnyNotNL
+)
+
+// An EmptyOp specifies a kind or mixture of zero-width assertions.
+type EmptyOp uint8
+
+const (
+ EmptyBeginLine EmptyOp = 1 << iota
+ EmptyEndLine
+ EmptyBeginText
+ EmptyEndText
+ EmptyWordBoundary
+ EmptyNoWordBoundary
+)
+
+// EmptyOpContext returns the zero-width assertions
+// satisfied at the position between the runes r1 and r2.
+// Passing r1 == -1 indicates that the position is
+// at the beginning of the text.
+// Passing r2 == -1 indicates that the position is
+// at the end of the text.
+func EmptyOpContext(r1, r2 int) EmptyOp {
+ var op EmptyOp
+ if r1 < 0 {
+ op |= EmptyBeginText | EmptyBeginLine
+ }
+ if r1 == '\n' {
+ op |= EmptyBeginLine
+ }
+ if r2 < 0 {
+ op |= EmptyEndText | EmptyEndLine
+ }
+ if r2 == '\n' {
+ op |= EmptyEndLine
+ }
+ if IsWordChar(r1) != IsWordChar(r2) {
+ op |= EmptyWordBoundary
+ } else {
+ op |= EmptyNoWordBoundary
+ }
+ return op
+}
+
+// IsWordChar reports whether r is consider a ``word character''
+// during the evaluation of the \b and \B zero-width assertions.
+// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
+func IsWordChar(r int) bool {
+ return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
+}
+
+// An Inst is a single instruction in a regular expression program.
+type Inst struct {
+ Op InstOp
+ Out uint32 // all but InstMatch, InstFail
+ Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
+ Rune []int
+}
+
+func (p *Prog) String() string {
+ var b bytes.Buffer
+ dumpProg(&b, p)
+ return b.String()
+}
+
+// skipNop follows any no-op or capturing instructions
+// and returns the resulting pc.
+func (p *Prog) skipNop(pc uint32) *Inst {
+ i := &p.Inst[pc]
+ for i.Op == InstNop || i.Op == InstCapture {
+ pc = i.Out
+ i = &p.Inst[pc]
+ }
+ return i
+}
+
+// op returns i.Op but merges all the Rune special cases into InstRune
+func (i *Inst) op() InstOp {
+ op := i.Op
+ switch op {
+ case InstRune1, InstRuneAny, InstRuneAnyNotNL:
+ op = InstRune
+ }
+ return op
+}
+
+// Prefix returns a literal string that all matches for the
+// regexp must start with. Complete is true if the prefix
+// is the entire match.
+func (p *Prog) Prefix() (prefix string, complete bool) {
+ i := p.skipNop(uint32(p.Start))
+
+ // Avoid allocation of buffer if prefix is empty.
+ if i.op() != InstRune || len(i.Rune) != 1 {
+ return "", i.Op == InstMatch
+ }
+
+ // Have prefix; gather characters.
+ var buf bytes.Buffer
+ for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
+ buf.WriteRune(i.Rune[0])
+ i = p.skipNop(i.Out)
+ }
+ return buf.String(), i.Op == InstMatch
+}
+
+// StartCond returns the leading empty-width conditions that must
+// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
+func (p *Prog) StartCond() EmptyOp {
+ var flag EmptyOp
+ pc := uint32(p.Start)
+ i := &p.Inst[pc]
+Loop:
+ for {
+ switch i.Op {
+ case InstEmptyWidth:
+ flag |= EmptyOp(i.Arg)
+ case InstFail:
+ return ^EmptyOp(0)
+ case InstCapture, InstNop:
+ // skip
+ default:
+ break Loop
+ }
+ pc = i.Out
+ i = &p.Inst[pc]
+ }
+ return flag
+}
+
+// MatchRune returns true if the instruction matches (and consumes) r.
+// It should only be called when i.Op == InstRune.
+func (i *Inst) MatchRune(r int) bool {
+ rune := i.Rune
+
+ // Special case: single-rune slice is from literal string, not char class.
+ if len(rune) == 1 {
+ r0 := rune[0]
+ if r == r0 {
+ return true
+ }
+ if Flags(i.Arg)&FoldCase != 0 {
+ for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
+ if r == r1 {
+ return true
+ }
+ }
+ }
+ return false
+ }
+
+ // Peek at the first few pairs.
+ // Should handle ASCII well.
+ for j := 0; j < len(rune) && j <= 8; j += 2 {
+ if r < rune[j] {
+ return false
+ }
+ if r <= rune[j+1] {
+ return true
+ }
+ }
+
+ // Otherwise binary search.
+ lo := 0
+ hi := len(rune) / 2
+ for lo < hi {
+ m := lo + (hi-lo)/2
+ if c := rune[2*m]; c <= r {
+ if r <= rune[2*m+1] {
+ return true
+ }
+ lo = m + 1
+ } else {
+ hi = m
+ }
+ }
+ return false
+}
+
+// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
+// Since we act on runes, it would be easy to support Unicode here.
+func wordRune(rune int) bool {
+ return rune == '_' ||
+ ('A' <= rune && rune <= 'Z') ||
+ ('a' <= rune && rune <= 'z') ||
+ ('0' <= rune && rune <= '9')
+}
+
+// MatchEmptyWidth returns true if the instruction matches
+// an empty string between the runes before and after.
+// It should only be called when i.Op == InstEmptyWidth.
+func (i *Inst) MatchEmptyWidth(before int, after int) bool {
+ switch EmptyOp(i.Arg) {
+ case EmptyBeginLine:
+ return before == '\n' || before == -1
+ case EmptyEndLine:
+ return after == '\n' || after == -1
+ case EmptyBeginText:
+ return before == -1
+ case EmptyEndText:
+ return after == -1
+ case EmptyWordBoundary:
+ return wordRune(before) != wordRune(after)
+ case EmptyNoWordBoundary:
+ return wordRune(before) == wordRune(after)
+ }
+ panic("unknown empty width arg")
+}
+
+func (i *Inst) String() string {
+ var b bytes.Buffer
+ dumpInst(&b, i)
+ return b.String()
+}
+
+func bw(b *bytes.Buffer, args ...string) {
+ for _, s := range args {
+ b.WriteString(s)
+ }
+}
+
+func dumpProg(b *bytes.Buffer, p *Prog) {
+ for j := range p.Inst {
+ i := &p.Inst[j]
+ pc := strconv.Itoa(j)
+ if len(pc) < 3 {
+ b.WriteString(" "[len(pc):])
+ }
+ if j == p.Start {
+ pc += "*"
+ }
+ bw(b, pc, "\t")
+ dumpInst(b, i)
+ bw(b, "\n")
+ }
+}
+
+func u32(i uint32) string {
+ return strconv.Uitoa64(uint64(i))
+}
+
+func dumpInst(b *bytes.Buffer, i *Inst) {
+ switch i.Op {
+ case InstAlt:
+ bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
+ case InstAltMatch:
+ bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
+ case InstCapture:
+ bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
+ case InstEmptyWidth:
+ bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
+ case InstMatch:
+ bw(b, "match")
+ case InstFail:
+ bw(b, "fail")
+ case InstNop:
+ bw(b, "nop -> ", u32(i.Out))
+ case InstRune:
+ if i.Rune == nil {
+ // shouldn't happen
+ bw(b, "rune <nil>")
+ }
+ bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
+ if Flags(i.Arg)&FoldCase != 0 {
+ bw(b, "/i")
+ }
+ bw(b, " -> ", u32(i.Out))
+ case InstRune1:
+ bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
+ case InstRuneAny:
+ bw(b, "any -> ", u32(i.Out))
+ case InstRuneAnyNotNL:
+ bw(b, "anynotnl -> ", u32(i.Out))
+ }
+}
diff --git a/libgo/go/regexp/syntax/prog_test.go b/libgo/go/regexp/syntax/prog_test.go
new file mode 100644
index 00000000000..70959f6790a
--- /dev/null
+++ b/libgo/go/regexp/syntax/prog_test.go
@@ -0,0 +1,102 @@
+package syntax_test
+
+import (
+ . "regexp/syntax"
+ "testing"
+)
+
+var compileTests = []struct {
+ Regexp string
+ Prog string
+}{
+ {"a", ` 0 fail
+ 1* rune1 "a" -> 2
+ 2 match
+`},
+ {"[A-M][n-z]", ` 0 fail
+ 1* rune "AM" -> 2
+ 2 rune "nz" -> 3
+ 3 match
+`},
+ {"", ` 0 fail
+ 1* nop -> 2
+ 2 match
+`},
+ {"a?", ` 0 fail
+ 1 rune1 "a" -> 3
+ 2* alt -> 1, 3
+ 3 match
+`},
+ {"a??", ` 0 fail
+ 1 rune1 "a" -> 3
+ 2* alt -> 3, 1
+ 3 match
+`},
+ {"a+", ` 0 fail
+ 1* rune1 "a" -> 2
+ 2 alt -> 1, 3
+ 3 match
+`},
+ {"a+?", ` 0 fail
+ 1* rune1 "a" -> 2
+ 2 alt -> 3, 1
+ 3 match
+`},
+ {"a*", ` 0 fail
+ 1 rune1 "a" -> 2
+ 2* alt -> 1, 3
+ 3 match
+`},
+ {"a*?", ` 0 fail
+ 1 rune1 "a" -> 2
+ 2* alt -> 3, 1
+ 3 match
+`},
+ {"a+b+", ` 0 fail
+ 1* rune1 "a" -> 2
+ 2 alt -> 1, 3
+ 3 rune1 "b" -> 4
+ 4 alt -> 3, 5
+ 5 match
+`},
+ {"(a+)(b+)", ` 0 fail
+ 1* cap 2 -> 2
+ 2 rune1 "a" -> 3
+ 3 alt -> 2, 4
+ 4 cap 3 -> 5
+ 5 cap 4 -> 6
+ 6 rune1 "b" -> 7
+ 7 alt -> 6, 8
+ 8 cap 5 -> 9
+ 9 match
+`},
+ {"a+|b+", ` 0 fail
+ 1 rune1 "a" -> 2
+ 2 alt -> 1, 6
+ 3 rune1 "b" -> 4
+ 4 alt -> 3, 6
+ 5* alt -> 1, 3
+ 6 match
+`},
+ {"A[Aa]", ` 0 fail
+ 1* rune1 "A" -> 2
+ 2 rune "A"/i -> 3
+ 3 match
+`},
+ {"(?:(?:^).)", ` 0 fail
+ 1* empty 4 -> 2
+ 2 anynotnl -> 3
+ 3 match
+`},
+}
+
+func TestCompile(t *testing.T) {
+ for _, tt := range compileTests {
+ re, _ := Parse(tt.Regexp, Perl)
+ p, _ := Compile(re)
+ s := p.String()
+ if s != tt.Prog {
+ t.Errorf("compiled %#q:\n--- have\n%s---\n--- want\n%s---", tt.Regexp, s, tt.Prog)
+ }
+ }
+}
diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go
new file mode 100644
index 00000000000..033848df28a
--- /dev/null
+++ b/libgo/go/regexp/syntax/regexp.go
@@ -0,0 +1,305 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package syntax parses regular expressions into syntax trees.
+// WORK IN PROGRESS.
+package syntax
+
+// Note to implementers:
+// In this package, re is always a *Regexp and r is always a rune.
+
+import (
+ "bytes"
+ "strconv"
+ "strings"
+ "unicode"
+)
+
+// A Regexp is a node in a regular expression syntax tree.
+type Regexp struct {
+ Op Op // operator
+ Flags Flags
+ Sub []*Regexp // subexpressions, if any
+ Sub0 [1]*Regexp // storage for short Sub
+ Rune []int // matched runes, for OpLiteral, OpCharClass
+ Rune0 [2]int // storage for short Rune
+ Min, Max int // min, max for OpRepeat
+ Cap int // capturing index, for OpCapture
+ Name string // capturing name, for OpCapture
+}
+
+// An Op is a single regular expression operator.
+type Op uint8
+
+// Operators are listed in precedence order, tightest binding to weakest.
+// Character class operators are listed simplest to most complex
+// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
+
+const (
+ OpNoMatch Op = 1 + iota // matches no strings
+ OpEmptyMatch // matches empty string
+ OpLiteral // matches Runes sequence
+ OpCharClass // matches Runes interpreted as range pair list
+ OpAnyCharNotNL // matches any character
+ OpAnyChar // matches any character
+ OpBeginLine // matches empty string at beginning of line
+ OpEndLine // matches empty string at end of line
+ OpBeginText // matches empty string at beginning of text
+ OpEndText // matches empty string at end of text
+ OpWordBoundary // matches word boundary `\b`
+ OpNoWordBoundary // matches word non-boundary `\B`
+ OpCapture // capturing subexpression with index Cap, optional name Name
+ OpStar // matches Sub[0] zero or more times
+ OpPlus // matches Sub[0] one or more times
+ OpQuest // matches Sub[0] zero or one times
+ OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
+ OpConcat // matches concatenation of Subs
+ OpAlternate // matches alternation of Subs
+)
+
+const opPseudo Op = 128 // where pseudo-ops start
+
+// Equal returns true if x and y have identical structure.
+func (x *Regexp) Equal(y *Regexp) bool {
+ if x == nil || y == nil {
+ return x == y
+ }
+ if x.Op != y.Op {
+ return false
+ }
+ switch x.Op {
+ case OpEndText:
+ // The parse flags remember whether this is \z or \Z.
+ if x.Flags&WasDollar != y.Flags&WasDollar {
+ return false
+ }
+
+ case OpLiteral, OpCharClass:
+ if len(x.Rune) != len(y.Rune) {
+ return false
+ }
+ for i, r := range x.Rune {
+ if r != y.Rune[i] {
+ return false
+ }
+ }
+
+ case OpAlternate, OpConcat:
+ if len(x.Sub) != len(y.Sub) {
+ return false
+ }
+ for i, sub := range x.Sub {
+ if !sub.Equal(y.Sub[i]) {
+ return false
+ }
+ }
+
+ case OpStar, OpPlus, OpQuest:
+ if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
+ return false
+ }
+
+ case OpRepeat:
+ if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
+ return false
+ }
+
+ case OpCapture:
+ if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
+ return false
+ }
+ }
+ return true
+}
+
+// writeRegexp writes the Perl syntax for the regular expression re to b.
+func writeRegexp(b *bytes.Buffer, re *Regexp) {
+ switch re.Op {
+ default:
+ b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
+ case OpNoMatch:
+ b.WriteString(`[^\x00-\x{10FFFF}]`)
+ case OpEmptyMatch:
+ b.WriteString(`(?:)`)
+ case OpLiteral:
+ if re.Flags&FoldCase != 0 {
+ b.WriteString(`(?i:`)
+ }
+ for _, r := range re.Rune {
+ escape(b, r, false)
+ }
+ if re.Flags&FoldCase != 0 {
+ b.WriteString(`)`)
+ }
+ case OpCharClass:
+ if len(re.Rune)%2 != 0 {
+ b.WriteString(`[invalid char class]`)
+ break
+ }
+ b.WriteRune('[')
+ if len(re.Rune) == 0 {
+ b.WriteString(`^\x00-\x{10FFFF}`)
+ } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
+ // Contains 0 and MaxRune. Probably a negated class.
+ // Print the gaps.
+ b.WriteRune('^')
+ for i := 1; i < len(re.Rune)-1; i += 2 {
+ lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
+ escape(b, lo, lo == '-')
+ if lo != hi {
+ b.WriteRune('-')
+ escape(b, hi, hi == '-')
+ }
+ }
+ } else {
+ for i := 0; i < len(re.Rune); i += 2 {
+ lo, hi := re.Rune[i], re.Rune[i+1]
+ escape(b, lo, lo == '-')
+ if lo != hi {
+ b.WriteRune('-')
+ escape(b, hi, hi == '-')
+ }
+ }
+ }
+ b.WriteRune(']')
+ case OpAnyCharNotNL:
+ b.WriteString(`(?-s:.)`)
+ case OpAnyChar:
+ b.WriteString(`(?s:.)`)
+ case OpBeginLine:
+ b.WriteRune('^')
+ case OpEndLine:
+ b.WriteRune('$')
+ case OpBeginText:
+ b.WriteString(`\A`)
+ case OpEndText:
+ if re.Flags&WasDollar != 0 {
+ b.WriteString(`(?-m:$)`)
+ } else {
+ b.WriteString(`\z`)
+ }
+ case OpWordBoundary:
+ b.WriteString(`\b`)
+ case OpNoWordBoundary:
+ b.WriteString(`\B`)
+ case OpCapture:
+ if re.Name != "" {
+ b.WriteString(`(?P<`)
+ b.WriteString(re.Name)
+ b.WriteRune('>')
+ } else {
+ b.WriteRune('(')
+ }
+ if re.Sub[0].Op != OpEmptyMatch {
+ writeRegexp(b, re.Sub[0])
+ }
+ b.WriteRune(')')
+ case OpStar, OpPlus, OpQuest, OpRepeat:
+ if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
+ b.WriteString(`(?:`)
+ writeRegexp(b, sub)
+ b.WriteString(`)`)
+ } else {
+ writeRegexp(b, sub)
+ }
+ switch re.Op {
+ case OpStar:
+ b.WriteRune('*')
+ case OpPlus:
+ b.WriteRune('+')
+ case OpQuest:
+ b.WriteRune('?')
+ case OpRepeat:
+ b.WriteRune('{')
+ b.WriteString(strconv.Itoa(re.Min))
+ if re.Max != re.Min {
+ b.WriteRune(',')
+ if re.Max >= 0 {
+ b.WriteString(strconv.Itoa(re.Max))
+ }
+ }
+ b.WriteRune('}')
+ }
+ if re.Flags&NonGreedy != 0 {
+ b.WriteRune('?')
+ }
+ case OpConcat:
+ for _, sub := range re.Sub {
+ if sub.Op == OpAlternate {
+ b.WriteString(`(?:`)
+ writeRegexp(b, sub)
+ b.WriteString(`)`)
+ } else {
+ writeRegexp(b, sub)
+ }
+ }
+ case OpAlternate:
+ for i, sub := range re.Sub {
+ if i > 0 {
+ b.WriteRune('|')
+ }
+ writeRegexp(b, sub)
+ }
+ }
+}
+
+func (re *Regexp) String() string {
+ var b bytes.Buffer
+ writeRegexp(&b, re)
+ return b.String()
+}
+
+const meta = `\.+*?()|[]{}^$`
+
+func escape(b *bytes.Buffer, r int, force bool) {
+ if unicode.IsPrint(r) {
+ if strings.IndexRune(meta, r) >= 0 || force {
+ b.WriteRune('\\')
+ }
+ b.WriteRune(r)
+ return
+ }
+
+ switch r {
+ case '\a':
+ b.WriteString(`\a`)
+ case '\f':
+ b.WriteString(`\f`)
+ case '\n':
+ b.WriteString(`\n`)
+ case '\r':
+ b.WriteString(`\r`)
+ case '\t':
+ b.WriteString(`\t`)
+ case '\v':
+ b.WriteString(`\v`)
+ default:
+ if r < 0x100 {
+ b.WriteString(`\x`)
+ s := strconv.Itob(r, 16)
+ if len(s) == 1 {
+ b.WriteRune('0')
+ }
+ b.WriteString(s)
+ break
+ }
+ b.WriteString(`\x{`)
+ b.WriteString(strconv.Itob(r, 16))
+ b.WriteString(`}`)
+ }
+}
+
+// MaxCap walks the regexp to find the maximum capture index.
+func (re *Regexp) MaxCap() int {
+ m := 0
+ if re.Op == OpCapture {
+ m = re.Cap
+ }
+ for _, sub := range re.Sub {
+ if n := sub.MaxCap(); m < n {
+ m = n
+ }
+ }
+ return m
+}
diff --git a/libgo/go/regexp/syntax/simplify.go b/libgo/go/regexp/syntax/simplify.go
new file mode 100644
index 00000000000..72390417bbe
--- /dev/null
+++ b/libgo/go/regexp/syntax/simplify.go
@@ -0,0 +1,151 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Simplify returns a regexp equivalent to re but without counted repetitions
+// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
+// The resulting regexp will execute correctly but its string representation
+// will not produce the same parse tree, because capturing parentheses
+// may have been duplicated or removed. For example, the simplified form
+// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
+// The returned regexp may share structure with or be the original.
+func (re *Regexp) Simplify() *Regexp {
+ if re == nil {
+ return nil
+ }
+ switch re.Op {
+ case OpCapture, OpConcat, OpAlternate:
+ // Simplify children, building new Regexp if children change.
+ nre := re
+ for i, sub := range re.Sub {
+ nsub := sub.Simplify()
+ if nre == re && nsub != sub {
+ // Start a copy.
+ nre = new(Regexp)
+ *nre = *re
+ nre.Rune = nil
+ nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...)
+ }
+ if nre != re {
+ nre.Sub = append(nre.Sub, nsub)
+ }
+ }
+ return nre
+
+ case OpStar, OpPlus, OpQuest:
+ sub := re.Sub[0].Simplify()
+ return simplify1(re.Op, re.Flags, sub, re)
+
+ case OpRepeat:
+ // Special special case: x{0} matches the empty string
+ // and doesn't even need to consider x.
+ if re.Min == 0 && re.Max == 0 {
+ return &Regexp{Op: OpEmptyMatch}
+ }
+
+ // The fun begins.
+ sub := re.Sub[0].Simplify()
+
+ // x{n,} means at least n matches of x.
+ if re.Max == -1 {
+ // Special case: x{0,} is x*.
+ if re.Min == 0 {
+ return simplify1(OpStar, re.Flags, sub, nil)
+ }
+
+ // Special case: x{1,} is x+.
+ if re.Min == 1 {
+ return simplify1(OpPlus, re.Flags, sub, nil)
+ }
+
+ // General case: x{4,} is xxxx+.
+ nre := &Regexp{Op: OpConcat}
+ nre.Sub = nre.Sub0[:0]
+ for i := 0; i < re.Min-1; i++ {
+ nre.Sub = append(nre.Sub, sub)
+ }
+ nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil))
+ return nre
+ }
+
+ // Special case x{0} handled above.
+
+ // Special case: x{1} is just x.
+ if re.Min == 1 && re.Max == 1 {
+ return sub
+ }
+
+ // General case: x{n,m} means n copies of x and m copies of x?
+ // The machine will do less work if we nest the final m copies,
+ // so that x{2,5} = xx(x(x(x)?)?)?
+
+ // Build leading prefix: xx.
+ var prefix *Regexp
+ if re.Min > 0 {
+ prefix = &Regexp{Op: OpConcat}
+ prefix.Sub = prefix.Sub0[:0]
+ for i := 0; i < re.Min; i++ {
+ prefix.Sub = append(prefix.Sub, sub)
+ }
+ }
+
+ // Build and attach suffix: (x(x(x)?)?)?
+ if re.Max > re.Min {
+ suffix := simplify1(OpQuest, re.Flags, sub, nil)
+ for i := re.Min + 1; i < re.Max; i++ {
+ nre2 := &Regexp{Op: OpConcat}
+ nre2.Sub = append(nre2.Sub0[:0], sub, suffix)
+ suffix = simplify1(OpQuest, re.Flags, nre2, nil)
+ }
+ if prefix == nil {
+ return suffix
+ }
+ prefix.Sub = append(prefix.Sub, suffix)
+ }
+ if prefix != nil {
+ return prefix
+ }
+
+ // Some degenerate case like min > max or min < max < 0.
+ // Handle as impossible match.
+ return &Regexp{Op: OpNoMatch}
+ }
+
+ return re
+}
+
+// simplify1 implements Simplify for the unary OpStar,
+// OpPlus, and OpQuest operators. It returns the simple regexp
+// equivalent to
+//
+// Regexp{Op: op, Flags: flags, Sub: {sub}}
+//
+// under the assumption that sub is already simple, and
+// without first allocating that structure. If the regexp
+// to be returned turns out to be equivalent to re, simplify1
+// returns re instead.
+//
+// simplify1 is factored out of Simplify because the implementation
+// for other operators generates these unary expressions.
+// Letting them call simplify1 makes sure the expressions they
+// generate are simple.
+func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp {
+ // Special case: repeat the empty string as much as
+ // you want, but it's still the empty string.
+ if sub.Op == OpEmptyMatch {
+ return sub
+ }
+ // The operators are idempotent if the flags match.
+ if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy {
+ return sub
+ }
+ if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] {
+ return re
+ }
+
+ re = &Regexp{Op: op, Flags: flags}
+ re.Sub = append(re.Sub0[:0], sub)
+ return re
+}
diff --git a/libgo/go/regexp/syntax/simplify_test.go b/libgo/go/regexp/syntax/simplify_test.go
new file mode 100644
index 00000000000..92a9d3d6da3
--- /dev/null
+++ b/libgo/go/regexp/syntax/simplify_test.go
@@ -0,0 +1,152 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax_test
+
+import . "regexp/syntax"
+import "testing"
+
+var simplifyTests = []struct {
+ Regexp string
+ Simple string
+}{
+ // Already-simple constructs
+ {`a`, `a`},
+ {`ab`, `ab`},
+ {`a|b`, `[a-b]`},
+ {`ab|cd`, `ab|cd`},
+ {`(ab)*`, `(ab)*`},
+ {`(ab)+`, `(ab)+`},
+ {`(ab)?`, `(ab)?`},
+ {`.`, `(?s:.)`},
+ {`^`, `^`},
+ {`$`, `$`},
+ {`[ac]`, `[ac]`},
+ {`[^ac]`, `[^ac]`},
+
+ // Posix character classes
+ {`[[:alnum:]]`, `[0-9A-Za-z]`},
+ {`[[:alpha:]]`, `[A-Za-z]`},
+ {`[[:blank:]]`, `[\t ]`},
+ {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
+ {`[[:digit:]]`, `[0-9]`},
+ {`[[:graph:]]`, `[!-~]`},
+ {`[[:lower:]]`, `[a-z]`},
+ {`[[:print:]]`, `[ -~]`},
+ {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
+ {`[[:space:]]`, `[\t-\r ]`},
+ {`[[:upper:]]`, `[A-Z]`},
+ {`[[:xdigit:]]`, `[0-9A-Fa-f]`},
+
+ // Perl character classes
+ {`\d`, `[0-9]`},
+ {`\s`, `[\t-\n\f-\r ]`},
+ {`\w`, `[0-9A-Z_a-z]`},
+ {`\D`, `[^0-9]`},
+ {`\S`, `[^\t-\n\f-\r ]`},
+ {`\W`, `[^0-9A-Z_a-z]`},
+ {`[\d]`, `[0-9]`},
+ {`[\s]`, `[\t-\n\f-\r ]`},
+ {`[\w]`, `[0-9A-Z_a-z]`},
+ {`[\D]`, `[^0-9]`},
+ {`[\S]`, `[^\t-\n\f-\r ]`},
+ {`[\W]`, `[^0-9A-Z_a-z]`},
+
+ // Posix repetitions
+ {`a{1}`, `a`},
+ {`a{2}`, `aa`},
+ {`a{5}`, `aaaaa`},
+ {`a{0,1}`, `a?`},
+ // The next three are illegible because Simplify inserts (?:)
+ // parens instead of () parens to avoid creating extra
+ // captured subexpressions. The comments show a version with fewer parens.
+ {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)?
+ {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)?
+ {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
+ {`a{0,2}`, `(?:aa?)?`}, // (aa?)?
+ {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)?
+ {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)?
+ {`a{0,}`, `a*`},
+ {`a{1,}`, `a+`},
+ {`a{2,}`, `aa+`},
+ {`a{5,}`, `aaaaa+`},
+
+ // Test that operators simplify their arguments.
+ {`(?:a{1,}){1,}`, `a+`},
+ {`(a{1,}b{1,})`, `(a+b+)`},
+ {`a{1,}|b{1,}`, `a+|b+`},
+ {`(?:a{1,})*`, `(?:a+)*`},
+ {`(?:a{1,})+`, `a+`},
+ {`(?:a{1,})?`, `(?:a+)?`},
+ {``, `(?:)`},
+ {`a{0}`, `(?:)`},
+
+ // Character class simplification
+ {`[ab]`, `[a-b]`},
+ {`[a-za-za-z]`, `[a-z]`},
+ {`[A-Za-zA-Za-z]`, `[A-Za-z]`},
+ {`[ABCDEFGH]`, `[A-H]`},
+ {`[AB-CD-EF-GH]`, `[A-H]`},
+ {`[W-ZP-XE-R]`, `[E-Z]`},
+ {`[a-ee-gg-m]`, `[a-m]`},
+ {`[a-ea-ha-m]`, `[a-m]`},
+ {`[a-ma-ha-e]`, `[a-m]`},
+ {`[a-zA-Z0-9 -~]`, `[ -~]`},
+
+ // Empty character classes
+ {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
+
+ // Full character classes
+ {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},
+
+ // Unicode case folding.
+ {`(?i)A`, `(?i:A)`},
+ {`(?i)a`, `(?i:A)`},
+ {`(?i)[A]`, `(?i:A)`},
+ {`(?i)[a]`, `(?i:A)`},
+ {`(?i)K`, `(?i:K)`},
+ {`(?i)k`, `(?i:K)`},
+ {`(?i)\x{212a}`, "(?i:K)"},
+ {`(?i)[K]`, "[Kk\u212A]"},
+ {`(?i)[k]`, "[Kk\u212A]"},
+ {`(?i)[\x{212a}]`, "[Kk\u212A]"},
+ {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
+ {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
+ {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},
+
+ // Empty string as a regular expression.
+ // The empty string must be preserved inside parens in order
+ // to make submatches work right, so these tests are less
+ // interesting than they might otherwise be. String inserts
+ // explicit (?:) in place of non-parenthesized empty strings,
+ // to make them easier to spot for other parsers.
+ {`(a|b|)`, `([a-b]|(?:))`},
+ {`(|)`, `()`},
+ {`a()`, `a()`},
+ {`(()|())`, `(()|())`},
+ {`(a|)`, `(a|(?:))`},
+ {`ab()cd()`, `ab()cd()`},
+ {`()`, `()`},
+ {`()*`, `()*`},
+ {`()+`, `()+`},
+ {`()?`, `()?`},
+ {`(){0}`, `(?:)`},
+ {`(){1}`, `()`},
+ {`(){1,}`, `()+`},
+ {`(){0,2}`, `(?:()()?)?`},
+}
+
+func TestSimplify(t *testing.T) {
+ for _, tt := range simplifyTests {
+ re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
+ if err != nil {
+ t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
+ continue
+ }
+ s := re.Simplify().String()
+ if s != tt.Simple {
+ t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
+ }
+ }
+}
diff --git a/libgo/go/regexp/testdata/README b/libgo/go/regexp/testdata/README
new file mode 100644
index 00000000000..b1b301be83f
--- /dev/null
+++ b/libgo/go/regexp/testdata/README
@@ -0,0 +1,23 @@
+AT&T POSIX Test Files
+See textregex.c for copyright + license.
+
+testregex.c http://www2.research.att.com/~gsf/testregex/testregex.c
+basic.dat http://www2.research.att.com/~gsf/testregex/basic.dat
+nullsubexpr.dat http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat
+repetition.dat http://www2.research.att.com/~gsf/testregex/repetition.dat
+
+The test data has been edited to reflect RE2/Go differences:
+ * In a star of a possibly empty match like (a*)* matching x,
+ the no match case runs the starred subexpression zero times,
+ not once. This is consistent with (a*)* matching a, which
+ runs the starred subexpression one time, not twice.
+ * The submatch choice is first match, not the POSIX rule.
+
+Such changes are marked with 'RE2/Go'.
+
+
+RE2 Test Files
+
+re2-exhaustive.txt.bz2 and re2-search.txt are built by running
+'make log' in the RE2 distribution. http://code.google.com/p/re2/.
+The exhaustive file is compressed because it is huge.
diff --git a/libgo/go/regexp/testdata/basic.dat b/libgo/go/regexp/testdata/basic.dat
new file mode 100644
index 00000000000..7859290ba1d
--- /dev/null
+++ b/libgo/go/regexp/testdata/basic.dat
@@ -0,0 +1,221 @@
+NOTE all standard compliant implementations should pass these : 2002-05-31
+
+BE abracadabra$ abracadabracadabra (7,18)
+BE a...b abababbb (2,7)
+BE XXXXXX ..XXXXXX (2,8)
+E \) () (1,2)
+BE a] a]a (0,2)
+B } } (0,1)
+E \} } (0,1)
+BE \] ] (0,1)
+B ] ] (0,1)
+E ] ] (0,1)
+B { { (0,1)
+B } } (0,1)
+BE ^a ax (0,1)
+BE \^a a^a (1,3)
+BE a\^ a^ (0,2)
+BE a$ aa (1,2)
+BE a\$ a$ (0,2)
+BE ^$ NULL (0,0)
+E $^ NULL (0,0)
+E a($) aa (1,2)(2,2)
+E a*(^a) aa (0,1)(0,1)
+E (..)*(...)* a (0,0)
+E (..)*(...)* abcd (0,4)(2,4)
+E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
+E (ab)c|abc abc (0,3)(0,2)
+E a{0}b ab (1,2)
+E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
+E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
+E a{9876543210} NULL BADBR
+E ((a|a)|a) a (0,1)(0,1)(0,1)
+E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
+E a*(a.|aa) aaaa (0,4)(2,4)
+E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
+E (a|b)?.* b (0,1)(0,1)
+E (a|b)c|a(b|c) ac (0,2)(0,1)
+E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
+E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
+E (a|b)*c|(a|ab)*c xc (1,2)
+E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
+E a?(ab|ba)ab abab (0,4)(0,2)
+E a?(ac{0}b|ba)ab abab (0,4)(0,2)
+E ab|abab abbabab (0,2)
+E aba|bab|bba baaabbbaba (5,8)
+E aba|bab baaabbbaba (6,9)
+E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
+E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
+E ab|a xabc (1,3)
+E ab|a xxabc (2,4)
+Ei (Ab|cD)* aBcD (0,4)(2,4)
+BE [^-] --a (2,3)
+BE [a-]* --a (0,3)
+BE [a-m-]* --amoma-- (0,4)
+E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
+E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
+{E [[:upper:]] A (0,1) [[<element>]] not supported
+E [[:lower:]]+ `az{ (1,3)
+E [[:upper:]]+ @AZ[ (1,3)
+# No collation in Go
+#BE [[-]] [[-]] (2,4)
+#BE [[.NIL.]] NULL ECOLLATE
+#BE [[=aleph=]] NULL ECOLLATE
+}
+BE$ \n \n (0,1)
+BEn$ \n \n (0,1)
+BE$ [^a] \n (0,1)
+BE$ \na \na (0,2)
+E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
+BE xxx xxx (0,3)
+E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
+E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
+E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
+E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
+E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
+E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
+E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
+E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
+E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
+BE$ .* \x01\xff (0,2)
+E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
+L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
+E a*a*a*a*a*b aaaaaaaaab (0,10)
+BE ^ NULL (0,0)
+BE $ NULL (0,0)
+BE ^$ NULL (0,0)
+BE ^a$ a (0,1)
+BE abc abc (0,3)
+BE abc xabcy (1,4)
+BE abc ababc (2,5)
+BE ab*c abc (0,3)
+BE ab*bc abc (0,3)
+BE ab*bc abbc (0,4)
+BE ab*bc abbbbc (0,6)
+E ab+bc abbc (0,4)
+E ab+bc abbbbc (0,6)
+E ab?bc abbc (0,4)
+E ab?bc abc (0,3)
+E ab?c abc (0,3)
+BE ^abc$ abc (0,3)
+BE ^abc abcc (0,3)
+BE abc$ aabc (1,4)
+BE ^ abc (0,0)
+BE $ abc (3,3)
+BE a.c abc (0,3)
+BE a.c axc (0,3)
+BE a.*c axyzc (0,5)
+BE a[bc]d abd (0,3)
+BE a[b-d]e ace (0,3)
+BE a[b-d] aac (1,3)
+BE a[-b] a- (0,2)
+BE a[b-] a- (0,2)
+BE a] a] (0,2)
+BE a[]]b a]b (0,3)
+BE a[^bc]d aed (0,3)
+BE a[^-b]c adc (0,3)
+BE a[^]b]c adc (0,3)
+E ab|cd abc (0,2)
+E ab|cd abcd (0,2)
+E a\(b a(b (0,3)
+E a\(*b ab (0,2)
+E a\(*b a((b (0,4)
+E ((a)) abc (0,1)(0,1)(0,1)
+E (a)b(c) abc (0,3)(0,1)(2,3)
+E a+b+c aabbabc (4,7)
+E a* aaa (0,3)
+#E (a*)* - (0,0)(0,0)
+E (a*)* - (0,0)(?,?) RE2/Go
+E (a*)+ - (0,0)(0,0)
+#E (a*|b)* - (0,0)(0,0)
+E (a*|b)* - (0,0)(?,?) RE2/Go
+E (a+|b)* ab (0,2)(1,2)
+E (a+|b)+ ab (0,2)(1,2)
+E (a+|b)? ab (0,1)(0,1)
+BE [^ab]* cde (0,3)
+#E (^)* - (0,0)(0,0)
+E (^)* - (0,0)(?,?) RE2/Go
+BE a* NULL (0,0)
+E ([abc])*d abbbcd (0,6)(4,5)
+E ([abc])*bcd abcd (0,4)(0,1)
+E a|b|c|d|e e (0,1)
+E (a|b|c|d|e)f ef (0,2)(0,1)
+#E ((a*|b))* - (0,0)(0,0)(0,0)
+E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
+BE abcd*efg abcdefg (0,7)
+BE ab* xabyabbbz (1,3)
+BE ab* xayabbbz (1,2)
+E (ab|cd)e abcde (2,5)(2,4)
+BE [abhgefdc]ij hij (0,3)
+E (a|b)c*d abcd (1,4)(1,2)
+E (ab|ab*)bc abc (0,3)(0,1)
+E a([bc]*)c* abc (0,3)(1,3)
+E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
+E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
+E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
+E a[bcd]*dcdcde adcdcde (0,7)
+E (ab|a)b*c abc (0,3)(0,2)
+E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
+BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
+E ^a(bc+|b[eh])g|.h$ abh (1,3)
+E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
+E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
+E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
+E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE multiple words multiple words yeah (0,14)
+E (.*)c(.*) abcde (0,5)(0,2)(3,5)
+BE abcd abcd (0,4)
+E a(bc)d abcd (0,4)(1,3)
+E a[-]?c ac (0,3)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
+E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
+E a+(b|c)*d+ aabcdd (0,6)(3,4)
+E ^.+$ vivi (0,4)
+E ^(.+)$ vivi (0,4)(0,4)
+E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
+E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
+E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
+E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
+E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
+E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
+E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
+E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
+E ((foo)|bar)!bas bar!bas (0,7)(0,3)
+E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
+E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
+E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
+E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
+E (foo|(bar))!bas foo!bas (0,7)(0,3)
+E (foo|bar)!bas bar!bas (0,7)(0,3)
+E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
+E (foo|bar)!bas foo!bas (0,7)(0,3)
+E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
+E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
+E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
+E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
+E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
+E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
+E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
+E .*(/XXX).* /XXX (0,4)(0,4)
+E .*(\\XXX).* \XXX (0,4)(0,4)
+E \\XXX \XXX (0,4)
+E .*(/000).* /000 (0,4)(0,4)
+E .*(\\000).* \000 (0,4)(0,4)
+E \\000 \000 (0,4)
diff --git a/libgo/go/regexp/testdata/nullsubexpr.dat b/libgo/go/regexp/testdata/nullsubexpr.dat
new file mode 100644
index 00000000000..2e18fbb9170
--- /dev/null
+++ b/libgo/go/regexp/testdata/nullsubexpr.dat
@@ -0,0 +1,79 @@
+NOTE null subexpression matches : 2002-06-06
+
+E (a*)* a (0,1)(0,1)
+#E SAME x (0,0)(0,0)
+E SAME x (0,0)(?,?) RE2/Go
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaax (0,6)(0,6)
+E (a*)+ a (0,1)(0,1)
+E SAME x (0,0)(0,0)
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaax (0,6)(0,6)
+E (a+)* a (0,1)(0,1)
+E SAME x (0,0)
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaax (0,6)(0,6)
+E (a+)+ a (0,1)(0,1)
+E SAME x NOMATCH
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaax (0,6)(0,6)
+
+E ([a]*)* a (0,1)(0,1)
+#E SAME x (0,0)(0,0)
+E SAME x (0,0)(?,?) RE2/Go
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaax (0,6)(0,6)
+E ([a]*)+ a (0,1)(0,1)
+E SAME x (0,0)(0,0)
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaax (0,6)(0,6)
+E ([^b]*)* a (0,1)(0,1)
+#E SAME b (0,0)(0,0)
+E SAME b (0,0)(?,?) RE2/Go
+E SAME aaaaaa (0,6)(0,6)
+E SAME aaaaaab (0,6)(0,6)
+E ([ab]*)* a (0,1)(0,1)
+E SAME aaaaaa (0,6)(0,6)
+E SAME ababab (0,6)(0,6)
+E SAME bababa (0,6)(0,6)
+E SAME b (0,1)(0,1)
+E SAME bbbbbb (0,6)(0,6)
+E SAME aaaabcde (0,5)(0,5)
+E ([^a]*)* b (0,1)(0,1)
+E SAME bbbbbb (0,6)(0,6)
+#E SAME aaaaaa (0,0)(0,0)
+E SAME aaaaaa (0,0)(?,?) RE2/Go
+E ([^ab]*)* ccccxx (0,6)(0,6)
+#E SAME ababab (0,0)(0,0)
+E SAME ababab (0,0)(?,?) RE2/Go
+
+E ((z)+|a)* zabcde (0,2)(1,2)
+
+#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
+#E (a) aaa (0,1)(0,1)
+#E (a*?) aaa (0,0)(0,0)
+#E (a)*? aaa (0,0)
+#E (a*?)*? aaa (0,0)
+#}
+
+B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
+B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
+B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
+B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
+B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
+B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
+B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
+B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
+
+#E (a*)*(x) x (0,1)(0,0)(0,1)
+E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
+E (a*)*(x) ax (0,2)(0,1)(1,2)
+E (a*)*(x) axa (0,2)(0,1)(1,2)
+
+E (a*)+(x) x (0,1)(0,0)(0,1)
+E (a*)+(x) ax (0,2)(0,1)(1,2)
+E (a*)+(x) axa (0,2)(0,1)(1,2)
+
+E (a*){2}(x) x (0,1)(0,0)(0,1)
+E (a*){2}(x) ax (0,2)(1,1)(1,2)
+E (a*){2}(x) axa (0,2)(1,1)(1,2)
diff --git a/libgo/go/regexp/testdata/re2-search.txt b/libgo/go/regexp/testdata/re2-search.txt
new file mode 100644
index 00000000000..f648e5527f7
--- /dev/null
+++ b/libgo/go/regexp/testdata/re2-search.txt
@@ -0,0 +1,3667 @@
+# RE2 basic search tests built by make log
+# Thu Sep 8 13:43:43 EDT 2011
+Regexp.SearchTests
+strings
+""
+"a"
+regexps
+"a"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:a)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:a)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:a)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"zyzzyva"
+regexps
+"a"
+-;-;-;-
+-;6-7;-;6-7
+"^(?:a)$"
+-;-;-;-
+-;-;-;-
+"^(?:a)"
+-;-;-;-
+-;-;-;-
+"(?:a)$"
+-;-;-;-
+-;6-7;-;6-7
+strings
+""
+"aa"
+regexps
+"a+"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:a+)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:a+)"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"(?:a+)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+strings
+""
+"ab"
+regexps
+"(a+|b)+"
+-;-;-;-
+0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
+"^(?:(a+|b)+)$"
+-;-;-;-
+0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
+"^(?:(a+|b)+)"
+-;-;-;-
+0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
+"(?:(a+|b)+)$"
+-;-;-;-
+0-2 1-2;0-2 1-2;0-2 1-2;0-2 1-2
+strings
+""
+"xabcdx"
+regexps
+"ab|cd"
+-;-;-;-
+-;1-3;-;1-3
+"^(?:ab|cd)$"
+-;-;-;-
+-;-;-;-
+"^(?:ab|cd)"
+-;-;-;-
+-;-;-;-
+"(?:ab|cd)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"hello\ngoodbye\n"
+regexps
+"h.*od?"
+-;-;-;-
+-;0-5;-;0-5
+"^(?:h.*od?)$"
+-;-;-;-
+-;-;-;-
+"^(?:h.*od?)"
+-;-;-;-
+-;0-5;-;0-5
+"(?:h.*od?)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"hello\ngoodbye\n"
+regexps
+"h.*o"
+-;-;-;-
+-;0-5;-;0-5
+"^(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+"^(?:h.*o)"
+-;-;-;-
+-;0-5;-;0-5
+"(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"goodbye\nhello\n"
+regexps
+"h.*o"
+-;-;-;-
+-;8-13;-;8-13
+"^(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+"^(?:h.*o)"
+-;-;-;-
+-;-;-;-
+"(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"hello world"
+regexps
+"h.*o"
+-;-;-;-
+-;0-8;-;0-8
+"^(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+"^(?:h.*o)"
+-;-;-;-
+-;0-8;-;0-8
+"(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"othello, world"
+regexps
+"h.*o"
+-;-;-;-
+-;2-11;-;2-11
+"^(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+"^(?:h.*o)"
+-;-;-;-
+-;-;-;-
+"(?:h.*o)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aaaaaaa"
+regexps
+"[^\\s\\S]"
+-;-;-;-
+-;-;-;-
+"^(?:[^\\s\\S])$"
+-;-;-;-
+-;-;-;-
+"^(?:[^\\s\\S])"
+-;-;-;-
+-;-;-;-
+"(?:[^\\s\\S])$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aaaaaaa"
+regexps
+"a"
+-;-;-;-
+-;0-1;-;0-1
+"^(?:a)$"
+-;-;-;-
+-;-;-;-
+"^(?:a)"
+-;-;-;-
+-;0-1;-;0-1
+"(?:a)$"
+-;-;-;-
+-;6-7;-;6-7
+strings
+""
+"aaaaaaa"
+regexps
+"a*"
+0-0;0-0;0-0;0-0
+0-7;0-7;0-7;0-7
+"^(?:a*)$"
+0-0;0-0;0-0;0-0
+0-7;0-7;0-7;0-7
+"^(?:a*)"
+0-0;0-0;0-0;0-0
+0-7;0-7;0-7;0-7
+"(?:a*)$"
+0-0;0-0;0-0;0-0
+0-7;0-7;0-7;0-7
+strings
+""
+""
+regexps
+"a*"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:a*)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:a*)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:a*)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+""
+regexps
+"a*"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:a*)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:a*)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:a*)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"xabcdx"
+regexps
+"ab|cd"
+-;-;-;-
+-;1-3;-;1-3
+"^(?:ab|cd)$"
+-;-;-;-
+-;-;-;-
+"^(?:ab|cd)"
+-;-;-;-
+-;-;-;-
+"(?:ab|cd)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"cab"
+regexps
+"a"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:a)$"
+-;-;-;-
+-;-;-;-
+"^(?:a)"
+-;-;-;-
+-;-;-;-
+"(?:a)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"cab"
+regexps
+"a*b"
+-;-;-;-
+-;1-3;-;1-3
+"^(?:a*b)$"
+-;-;-;-
+-;-;-;-
+"^(?:a*b)"
+-;-;-;-
+-;-;-;-
+"(?:a*b)$"
+-;-;-;-
+-;1-3;-;1-3
+strings
+""
+"x"
+regexps
+"((((((((((((((((((((x))))))))))))))))))))"
+-;-;-;-
+0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
+"^(?:((((((((((((((((((((x)))))))))))))))))))))$"
+-;-;-;-
+0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
+"^(?:((((((((((((((((((((x)))))))))))))))))))))"
+-;-;-;-
+0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
+"(?:((((((((((((((((((((x)))))))))))))))))))))$"
+-;-;-;-
+0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1;0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1 0-1
+strings
+""
+"xxxabcdxxx"
+regexps
+"[abcd]"
+-;-;-;-
+-;3-4;-;3-4
+"^(?:[abcd])$"
+-;-;-;-
+-;-;-;-
+"^(?:[abcd])"
+-;-;-;-
+-;-;-;-
+"(?:[abcd])$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xxxabcdxxx"
+regexps
+"[^x]"
+-;-;-;-
+-;3-4;-;3-4
+"^(?:[^x])$"
+-;-;-;-
+-;-;-;-
+"^(?:[^x])"
+-;-;-;-
+-;-;-;-
+"(?:[^x])$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xxxabcdxxx"
+regexps
+"[abcd]+"
+-;-;-;-
+-;3-7;-;3-7
+"^(?:[abcd]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[abcd]+)"
+-;-;-;-
+-;-;-;-
+"(?:[abcd]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xxxabcdxxx"
+regexps
+"[^x]+"
+-;-;-;-
+-;3-7;-;3-7
+"^(?:[^x]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[^x]+)"
+-;-;-;-
+-;-;-;-
+"(?:[^x]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"fo"
+regexps
+"(fo|foo)"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:(fo|foo))$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:(fo|foo))"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"(?:(fo|foo))$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+strings
+""
+"foo"
+regexps
+"(foo|fo)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(foo|fo))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(foo|fo))"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:(foo|fo))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"aA"
+regexps
+"aa"
+-;-;-;-
+-;-;-;-
+"^(?:aa)$"
+-;-;-;-
+-;-;-;-
+"^(?:aa)"
+-;-;-;-
+-;-;-;-
+"(?:aa)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"Aa"
+regexps
+"a"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:a)$"
+-;-;-;-
+-;-;-;-
+"^(?:a)"
+-;-;-;-
+-;-;-;-
+"(?:a)$"
+-;-;-;-
+-;1-2;-;1-2
+strings
+""
+"A"
+regexps
+"a"
+-;-;-;-
+-;-;-;-
+"^(?:a)$"
+-;-;-;-
+-;-;-;-
+"^(?:a)"
+-;-;-;-
+-;-;-;-
+"(?:a)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc"
+regexps
+"ABC"
+-;-;-;-
+-;-;-;-
+"^(?:ABC)$"
+-;-;-;-
+-;-;-;-
+"^(?:ABC)"
+-;-;-;-
+-;-;-;-
+"(?:ABC)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"XABCY"
+regexps
+"abc"
+-;-;-;-
+-;-;-;-
+"^(?:abc)$"
+-;-;-;-
+-;-;-;-
+"^(?:abc)"
+-;-;-;-
+-;-;-;-
+"(?:abc)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xabcy"
+regexps
+"ABC"
+-;-;-;-
+-;-;-;-
+"^(?:ABC)$"
+-;-;-;-
+-;-;-;-
+"^(?:ABC)"
+-;-;-;-
+-;-;-;-
+"(?:ABC)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"foo|bar|[A-Z]"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:foo|bar|[A-Z])$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:foo|bar|[A-Z])"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"(?:foo|bar|[A-Z])$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+strings
+""
+"foo"
+regexps
+"^(foo|bar|[A-Z])"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(foo|bar|[A-Z]))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(foo|bar|[A-Z]))"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:^(foo|bar|[A-Z]))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"foo\n"
+regexps
+"(foo|bar|[A-Z])$"
+-;-;-;-
+-;-;-;-
+"^(?:(foo|bar|[A-Z])$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(foo|bar|[A-Z])$)"
+-;-;-;-
+-;-;-;-
+"(?:(foo|bar|[A-Z])$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"(foo|bar|[A-Z])$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(foo|bar|[A-Z])$)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"foo\n"
+regexps
+"^(foo|bar|[A-Z])$"
+-;-;-;-
+-;-;-;-
+"^(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^(foo|bar|[A-Z])$)"
+-;-;-;-
+-;-;-;-
+"(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"^(foo|bar|[A-Z])$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(foo|bar|[A-Z])$)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"bar"
+regexps
+"^(foo|bar|[A-Z])$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(foo|bar|[A-Z])$)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"X"
+regexps
+"^(foo|bar|[A-Z])$"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+"^(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+"^(?:^(foo|bar|[A-Z])$)"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+"(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+strings
+""
+"XY"
+regexps
+"^(foo|bar|[A-Z])$"
+-;-;-;-
+-;-;-;-
+"^(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^(foo|bar|[A-Z])$)"
+-;-;-;-
+-;-;-;-
+"(?:^(foo|bar|[A-Z])$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"fo"
+regexps
+"^(fo|foo)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:^(fo|foo)$)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:^(fo|foo)$)"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"(?:^(fo|foo)$)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+strings
+""
+"foo"
+regexps
+"^(fo|foo)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(fo|foo)$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^(fo|foo)$)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:^(fo|foo)$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"fo"
+regexps
+"^^(fo|foo)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:^^(fo|foo)$)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:^^(fo|foo)$)"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"(?:^^(fo|foo)$)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+strings
+""
+"foo"
+regexps
+"^^(fo|foo)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^^(fo|foo)$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^^(fo|foo)$)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:^^(fo|foo)$)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+""
+regexps
+"^$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+""
+regexps
+"^^$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+""
+regexps
+"^$$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^$$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+""
+regexps
+"^^$$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^$$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^^$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^^$$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^$$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^$$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^^$$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+""
+regexps
+"^^^^^^^^$$$$$$$$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^^^^^^^$$$$$$$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^^^^^^^$$$$$$$$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^^^^^^^^$$$$$$$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^"
+0-0;0-0;0-0;0-0
+-;0-0;-;0-0
+"^(?:^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^)"
+0-0;0-0;0-0;0-0
+-;0-0;-;0-0
+"(?:^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"$"
+0-0;0-0;0-0;0-0
+-;1-1;-;1-1
+"^(?:$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:$)$"
+0-0;0-0;0-0;0-0
+-;1-1;-;1-1
+strings
+""
+"nofoo foo that"
+regexps
+"\\bfoo\\b"
+-;-;-;-
+-;6-9;-;6-9
+"^(?:\\bfoo\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bfoo\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bfoo\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"faoa x"
+regexps
+"a\\b"
+-;-;-;-
+-;3-4;-;3-4
+"^(?:a\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:a\\b)"
+-;-;-;-
+-;-;-;-
+"(?:a\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"bar x"
+regexps
+"\\bbar"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:\\bbar)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bbar)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:\\bbar)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo\nbar x"
+regexps
+"\\bbar"
+-;-;-;-
+-;4-7;-;4-7
+"^(?:\\bbar)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bbar)"
+-;-;-;-
+-;-;-;-
+"(?:\\bbar)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foobar"
+regexps
+"bar\\b"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:bar\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:bar\\b)"
+-;-;-;-
+-;-;-;-
+"(?:bar\\b)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"foobar\nxxx"
+regexps
+"bar\\b"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:bar\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:bar\\b)"
+-;-;-;-
+-;-;-;-
+"(?:bar\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"(foo|bar|[A-Z])\\b"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(foo|bar|[A-Z])\\b)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"foo\n"
+regexps
+"(foo|bar|[A-Z])\\b"
+-;-;-;-
+-;0-3 0-3;-;0-3 0-3
+"^(?:(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:(foo|bar|[A-Z])\\b)"
+-;-;-;-
+-;0-3 0-3;-;0-3 0-3
+"(?:(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"\\b"
+-;-;-;-
+-;0-0;-;0-0
+"^(?:\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b)"
+-;-;-;-
+-;0-0;-;0-0
+"(?:\\b)$"
+-;-;-;-
+-;1-1;-;1-1
+strings
+""
+"foo"
+regexps
+"\\b(foo|bar|[A-Z])"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z]))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z]))"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:\\b(foo|bar|[A-Z]))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"X"
+regexps
+"\\b(foo|bar|[A-Z])\\b"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+"^(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+"^(?:\\b(foo|bar|[A-Z])\\b)"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+"(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-1 0-1;0-1 0-1;0-1 0-1;0-1 0-1
+strings
+""
+"XY"
+regexps
+"\\b(foo|bar|[A-Z])\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b(foo|bar|[A-Z])\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"bar"
+regexps
+"\\b(foo|bar|[A-Z])\\b"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z])\\b)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"foo"
+regexps
+"\\b(foo|bar|[A-Z])\\b"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z])\\b)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"foo\n"
+regexps
+"\\b(foo|bar|[A-Z])\\b"
+-;-;-;-
+-;0-3 0-3;-;0-3 0-3
+"^(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b(foo|bar|[A-Z])\\b)"
+-;-;-;-
+-;0-3 0-3;-;0-3 0-3
+"(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"ffoo bbar N x"
+regexps
+"\\b(foo|bar|[A-Z])\\b"
+-;-;-;-
+-;10-11 10-11;-;10-11 10-11
+"^(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b(foo|bar|[A-Z])\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\b(foo|bar|[A-Z])\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"fo"
+regexps
+"\\b(fo|foo)\\b"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:\\b(fo|foo)\\b)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:\\b(fo|foo)\\b)"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"(?:\\b(fo|foo)\\b)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+strings
+""
+"foo"
+regexps
+"\\b(fo|foo)\\b"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(fo|foo)\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:\\b(fo|foo)\\b)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:\\b(fo|foo)\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+""
+regexps
+"\\b\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\b\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\b\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"\\b\\b"
+-;-;-;-
+-;0-0;-;0-0
+"^(?:\\b\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b\\b)"
+-;-;-;-
+-;0-0;-;0-0
+"(?:\\b\\b)$"
+-;-;-;-
+-;1-1;-;1-1
+strings
+""
+""
+regexps
+"\\b$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b$)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b$)"
+-;-;-;-
+-;-;-;-
+"(?:\\b$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"\\b$"
+-;-;-;-
+-;1-1;-;1-1
+"^(?:\\b$)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b$)"
+-;-;-;-
+-;-;-;-
+"(?:\\b$)$"
+-;-;-;-
+-;1-1;-;1-1
+strings
+""
+"y x"
+regexps
+"\\b$"
+-;-;-;-
+-;3-3;-;3-3
+"^(?:\\b$)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\b$)"
+-;-;-;-
+-;-;-;-
+"(?:\\b$)$"
+-;-;-;-
+-;3-3;-;3-3
+strings
+""
+"x"
+regexps
+"\\b.$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\b.$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\b.$)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\b.$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"fo"
+regexps
+"^\\b(fo|foo)\\b"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:^\\b(fo|foo)\\b)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"^(?:^\\b(fo|foo)\\b)"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+"(?:^\\b(fo|foo)\\b)$"
+-;-;-;-
+0-2 0-2;0-2 0-2;0-2 0-2;0-2 0-2
+strings
+""
+"foo"
+regexps
+"^\\b(fo|foo)\\b"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^\\b(fo|foo)\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:^\\b(fo|foo)\\b)"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"(?:^\\b(fo|foo)\\b)$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+""
+regexps
+"^\\b"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b)"
+-;-;-;-
+-;-;-;-
+"(?:^\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^\\b"
+-;-;-;-
+-;0-0;-;0-0
+"^(?:^\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b)"
+-;-;-;-
+-;0-0;-;0-0
+"(?:^\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"^\\b\\b"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b\\b)"
+-;-;-;-
+-;-;-;-
+"(?:^\\b\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^\\b\\b"
+-;-;-;-
+-;0-0;-;0-0
+"^(?:^\\b\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b\\b)"
+-;-;-;-
+-;0-0;-;0-0
+"(?:^\\b\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"^\\b$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b$)"
+-;-;-;-
+-;-;-;-
+"(?:^\\b$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^\\b$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\b$)"
+-;-;-;-
+-;-;-;-
+"(?:^\\b$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^\\b.$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:^\\b.$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:^\\b.$)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:^\\b.$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"x"
+regexps
+"^\\b.\\b$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:^\\b.\\b$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:^\\b.\\b$)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:^\\b.\\b$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+""
+regexps
+"^^^^^^^^\\b$$$$$$$"
+-;-;-;-
+-;-;-;-
+"^(?:^^^^^^^^\\b$$$$$$$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^^^^^^^^\\b$$$$$$$)"
+-;-;-;-
+-;-;-;-
+"(?:^^^^^^^^\\b$$$$$$$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^^^^^^^^\\b.$$$$$$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:^^^^^^^^\\b.$$$$$$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:^^^^^^^^\\b.$$$$$$)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:^^^^^^^^\\b.$$$$$$)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"x"
+regexps
+"^^^^^^^^\\b$$$$$$$"
+-;-;-;-
+-;-;-;-
+"^(?:^^^^^^^^\\b$$$$$$$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^^^^^^^^\\b$$$$$$$)"
+-;-;-;-
+-;-;-;-
+"(?:^^^^^^^^\\b$$$$$$$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"n foo xfoox that"
+regexps
+"\\Bfoo\\B"
+-;-;-;-
+-;7-10;-;7-10
+"^(?:\\Bfoo\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bfoo\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\Bfoo\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"faoa x"
+regexps
+"a\\B"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:a\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:a\\B)"
+-;-;-;-
+-;-;-;-
+"(?:a\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"bar x"
+regexps
+"\\Bbar"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bbar)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bbar)"
+-;-;-;-
+-;-;-;-
+"(?:\\Bbar)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo\nbar x"
+regexps
+"\\Bbar"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bbar)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bbar)"
+-;-;-;-
+-;-;-;-
+"(?:\\Bbar)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foobar"
+regexps
+"bar\\B"
+-;-;-;-
+-;-;-;-
+"^(?:bar\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:bar\\B)"
+-;-;-;-
+-;-;-;-
+"(?:bar\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foobar\nxxx"
+regexps
+"bar\\B"
+-;-;-;-
+-;-;-;-
+"^(?:bar\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:bar\\B)"
+-;-;-;-
+-;-;-;-
+"(?:bar\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foox"
+regexps
+"(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;0-3 0-3;-;0-3 0-3
+"^(?:(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;0-3 0-3;-;0-3 0-3
+"(?:(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo\n"
+regexps
+"(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;-;-;-
+"^(?:(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"\\B"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:\\B)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"\\B"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"\\B(foo|bar|[A-Z])"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z]))$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z]))"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z]))$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xXy"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;1-2 1-2;-;1-2 1-2
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"XY"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"XYZ"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;1-2 1-2;-;1-2 1-2
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abara"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;1-4 1-4;-;1-4 1-4
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xfoo_"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;1-4 1-4;-;1-4 1-4
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xfoo\n"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo bar vNx"
+regexps
+"\\B(foo|bar|[A-Z])\\B"
+-;-;-;-
+-;9-10 9-10;-;9-10 9-10
+"^(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|bar|[A-Z])\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|bar|[A-Z])\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xfoo"
+regexps
+"\\B(fo|foo)\\B"
+-;-;-;-
+-;1-3 1-3;-;1-3 1-3
+"^(?:\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(fo|foo)\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xfooo"
+regexps
+"\\B(foo|fo)\\B"
+-;-;-;-
+-;1-4 1-4;-;1-4 1-4
+"^(?:\\B(foo|fo)\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(foo|fo)\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(foo|fo)\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"\\B\\B"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:\\B\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:\\B\\B)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:\\B\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"\\B\\B"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B\\B)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:\\B\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+""
+regexps
+"\\B$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:\\B$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:\\B$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:\\B$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"\\B$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:\\B$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"y x"
+regexps
+"\\B$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:\\B$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:\\B$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"\\B.$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B.$)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B.$)"
+-;-;-;-
+-;-;-;-
+"(?:\\B.$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"fo"
+regexps
+"^\\B(fo|foo)\\B"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B(fo|foo)\\B)"
+-;-;-;-
+-;-;-;-
+"(?:^\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"^\\B(fo|foo)\\B"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B(fo|foo)\\B)"
+-;-;-;-
+-;-;-;-
+"(?:^\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"^\\B"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^\\B)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^\\B"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^\\B)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+""
+regexps
+"^\\B\\B"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^\\B\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^\\B\\B)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^\\B\\B)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^\\B\\B"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^\\B\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^\\B\\B)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^\\B\\B)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+""
+regexps
+"^\\B$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^\\B$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^\\B$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^\\B$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^\\B$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^\\B$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^\\B$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^\\B$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^\\B.$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B.$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B.$)"
+-;-;-;-
+-;-;-;-
+"(?:^\\B.$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^\\B.\\B$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B.\\B$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\B.\\B$)"
+-;-;-;-
+-;-;-;-
+"(?:^\\B.\\B$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"^^^^^^^^\\B$$$$$$$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^^^^^^^\\B$$$$$$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^^^^^^^^\\B$$$$$$$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^^^^^^^^\\B$$$$$$$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^^^^^^^^\\B.$$$$$$"
+-;-;-;-
+-;-;-;-
+"^(?:^^^^^^^^\\B.$$$$$$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^^^^^^^^\\B.$$$$$$)"
+-;-;-;-
+-;-;-;-
+"(?:^^^^^^^^\\B.$$$$$$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^^^^^^^^\\B$$$$$$$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^^^^^^^\\B$$$$$$$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^^^^^^^\\B$$$$$$$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^^^^^^^^\\B$$$$$$$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"\\bx\\b"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\bx\\b)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\bx\\b)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\bx\\b)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"x>"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;0-1;-;0-1
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;0-1;-;0-1
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"<x"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;1-2;-;1-2
+strings
+""
+"<x>"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"ax"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xb"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"axb"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"«x"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;2-3;-;2-3
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;2-3;-;2-3
+strings
+""
+"x»"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;0-1;-;0-1
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;0-1;-;0-1
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"«x»"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;2-3;-;2-3
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"axb"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"áxβ"
+regexps
+"\\bx\\b"
+-;-;-;-
+-;2-3;-;2-3
+"^(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\bx\\b)"
+-;-;-;-
+-;-;-;-
+"(?:\\bx\\b)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"axb"
+regexps
+"\\Bx\\B"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:\\Bx\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bx\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\Bx\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"áxβ"
+regexps
+"\\Bx\\B"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bx\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\Bx\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\Bx\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+""
+regexps
+"^$^$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$^$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^$^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+""
+regexps
+"^$^"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$^)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:^$^)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:^$^)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+""
+regexps
+"$^$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:$^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"^(?:$^$)"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+"(?:$^$)$"
+0-0;0-0;0-0;0-0
+0-0;0-0;0-0;0-0
+strings
+""
+"x"
+regexps
+"^$^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"^$^"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x"
+regexps
+"$^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x\ny"
+regexps
+"^$^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x\ny"
+regexps
+"^$^"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x\ny"
+regexps
+"$^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x\n\ny"
+regexps
+"^$^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x\n\ny"
+regexps
+"^$^"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^$^)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:^$^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"x\n\ny"
+regexps
+"$^$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:$^$)"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"(?:$^$)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"foo$bar"
+regexps
+"^(foo\\$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^(foo\\$)$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^(foo\\$)$)"
+-;-;-;-
+-;-;-;-
+"(?:^(foo\\$)$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo$bar"
+regexps
+"(foo\\$)"
+-;-;-;-
+-;0-4 0-4;-;0-4 0-4
+"^(?:(foo\\$))$"
+-;-;-;-
+-;-;-;-
+"^(?:(foo\\$))"
+-;-;-;-
+-;0-4 0-4;-;0-4 0-4
+"(?:(foo\\$))$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc"
+regexps
+"^...$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:^...$)$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:^...$)"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"(?:^...$)$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+strings
+""
+"本"
+regexps
+"^本$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:^本$)$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:^本$)"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"(?:^本$)$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+strings
+""
+"日本語"
+regexps
+"^...$"
+-;-;-;-
+0-9;0-9;0-9;0-9
+"^(?:^...$)$"
+-;-;-;-
+0-9;0-9;0-9;0-9
+"^(?:^...$)"
+-;-;-;-
+0-9;0-9;0-9;0-9
+"(?:^...$)$"
+-;-;-;-
+0-9;0-9;0-9;0-9
+strings
+""
+".本."
+regexps
+"^...$"
+-;-;-;-
+0-5;0-5;0-5;0-5
+"^(?:^...$)$"
+-;-;-;-
+0-5;0-5;0-5;0-5
+"^(?:^...$)"
+-;-;-;-
+0-5;0-5;0-5;0-5
+"(?:^...$)$"
+-;-;-;-
+0-5;0-5;0-5;0-5
+strings
+""
+"本"
+regexps
+"^\\C\\C\\C$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:^\\C\\C\\C$)$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"^(?:^\\C\\C\\C$)"
+-;-;-;-
+0-3;0-3;0-3;0-3
+"(?:^\\C\\C\\C$)$"
+-;-;-;-
+0-3;0-3;0-3;0-3
+strings
+""
+"本"
+regexps
+"^\\C$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\C$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\C$)"
+-;-;-;-
+-;-;-;-
+"(?:^\\C$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"日本語"
+regexps
+"^\\C\\C\\C$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\C\\C\\C$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^\\C\\C\\C$)"
+-;-;-;-
+-;-;-;-
+"(?:^\\C\\C\\C$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"日本語"
+regexps
+"^...$"
+-;-;-;-
+0-9;0-9;0-9;0-9
+"^(?:^...$)$"
+-;-;-;-
+0-9;0-9;0-9;0-9
+"^(?:^...$)"
+-;-;-;-
+0-9;0-9;0-9;0-9
+"(?:^...$)$"
+-;-;-;-
+0-9;0-9;0-9;0-9
+strings
+""
+"日本語"
+regexps
+"^.........$"
+-;-;-;-
+-;-;-;-
+"^(?:^.........$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^.........$)"
+-;-;-;-
+-;-;-;-
+"(?:^.........$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+".本."
+regexps
+"^...$"
+-;-;-;-
+0-5;0-5;0-5;0-5
+"^(?:^...$)$"
+-;-;-;-
+0-5;0-5;0-5;0-5
+"^(?:^...$)"
+-;-;-;-
+0-5;0-5;0-5;0-5
+"(?:^...$)$"
+-;-;-;-
+0-5;0-5;0-5;0-5
+strings
+""
+".本."
+regexps
+"^.....$"
+-;-;-;-
+-;-;-;-
+"^(?:^.....$)$"
+-;-;-;-
+-;-;-;-
+"^(?:^.....$)"
+-;-;-;-
+-;-;-;-
+"(?:^.....$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"xfooo"
+regexps
+"\\B(fo|foo)\\B"
+-;-;-;-
+-;1-3 1-3;-;1-4 1-4
+"^(?:\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\B(fo|foo)\\B)"
+-;-;-;-
+-;-;-;-
+"(?:\\B(fo|foo)\\B)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"foo"
+regexps
+"(fo|foo)"
+-;-;-;-
+0-3 0-3;0-2 0-2;0-3 0-3;0-3 0-3
+"^(?:(fo|foo))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+"^(?:(fo|foo))"
+-;-;-;-
+0-3 0-3;0-2 0-2;0-3 0-3;0-3 0-3
+"(?:(fo|foo))$"
+-;-;-;-
+0-3 0-3;0-3 0-3;0-3 0-3;0-3 0-3
+strings
+""
+"a"
+regexps
+"\\141"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\141)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\141)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\141)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"0"
+regexps
+"\\060"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\060)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\060)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\060)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"00"
+regexps
+"\\0600"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:\\0600)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:\\0600)"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"(?:\\0600)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+strings
+""
+"08"
+regexps
+"\\608"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:\\608)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:\\608)"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"(?:\\608)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+strings
+""
+""
+regexps
+"\\01"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\01)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\01)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\01)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"8"
+regexps
+"\\018"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:\\018)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"^(?:\\018)"
+-;-;-;-
+0-2;0-2;0-2;0-2
+"(?:\\018)$"
+-;-;-;-
+0-2;0-2;0-2;0-2
+strings
+""
+"a"
+regexps
+"\\x{61}"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\x{61})$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\x{61})"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\x{61})$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"a"
+regexps
+"\\x61"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\x61)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\x61)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\x61)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"a"
+regexps
+"\\x{00000061}"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\x{00000061})$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:\\x{00000061})"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:\\x{00000061})$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"aαβb"
+regexps
+"\\p{Greek}+"
+-;-;-;-
+-;1-5;-;1-5
+"^(?:\\p{Greek}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\p{Greek}+)"
+-;-;-;-
+-;-;-;-
+"(?:\\p{Greek}+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aαβb"
+regexps
+"\\P{Greek}+"
+-;-;-;-
+-;0-1;-;0-1
+"^(?:\\P{Greek}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\P{Greek}+)"
+-;-;-;-
+-;0-1;-;0-1
+"(?:\\P{Greek}+)$"
+-;-;-;-
+-;5-6;-;5-6
+strings
+""
+"aαβb"
+regexps
+"\\p{^Greek}+"
+-;-;-;-
+-;0-1;-;0-1
+"^(?:\\p{^Greek}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\p{^Greek}+)"
+-;-;-;-
+-;0-1;-;0-1
+"(?:\\p{^Greek}+)$"
+-;-;-;-
+-;5-6;-;5-6
+strings
+""
+"aαβb"
+regexps
+"\\P{^Greek}+"
+-;-;-;-
+-;1-5;-;1-5
+"^(?:\\P{^Greek}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\P{^Greek}+)"
+-;-;-;-
+-;-;-;-
+"(?:\\P{^Greek}+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc123"
+regexps
+"[^0-9]+"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:[^0-9]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[^0-9]+)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:[^0-9]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\p{Nd}+"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:\\p{Nd}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\p{Nd}+)"
+-;-;-;-
+-;-;-;-
+"(?:\\p{Nd}+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\p{^Nd}+"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:\\p{^Nd}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\p{^Nd}+)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:\\p{^Nd}+)$"
+-;-;-;-
+-;6-22;-;6-22
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\P{Nd}+"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:\\P{Nd}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\P{Nd}+)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:\\P{Nd}+)$"
+-;-;-;-
+-;6-22;-;6-22
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\P{^Nd}+"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:\\P{^Nd}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\P{^Nd}+)"
+-;-;-;-
+-;-;-;-
+"(?:\\P{^Nd}+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\pN+"
+-;-;-;-
+-;3-22;-;3-22
+"^(?:\\pN+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\pN+)"
+-;-;-;-
+-;-;-;-
+"(?:\\pN+)$"
+-;-;-;-
+-;3-22;-;3-22
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\p{N}+"
+-;-;-;-
+-;3-22;-;3-22
+"^(?:\\p{N}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\p{N}+)"
+-;-;-;-
+-;-;-;-
+"(?:\\p{N}+)$"
+-;-;-;-
+-;3-22;-;3-22
+strings
+""
+"abc123²³¼½¾₀₉"
+regexps
+"\\p{^N}+"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:\\p{^N}+)$"
+-;-;-;-
+-;-;-;-
+"^(?:\\p{^N}+)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:\\p{^N}+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abc123"
+regexps
+"\\p{Any}+"
+-;-;-;-
+0-6;0-6;0-6;0-6
+"^(?:\\p{Any}+)$"
+-;-;-;-
+0-6;0-6;0-6;0-6
+"^(?:\\p{Any}+)"
+-;-;-;-
+0-6;0-6;0-6;0-6
+"(?:\\p{Any}+)$"
+-;-;-;-
+0-6;0-6;0-6;0-6
+strings
+""
+"@AaB"
+regexps
+"(?i)[@-A]+"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:(?i)[@-A]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?i)[@-A]+)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:(?i)[@-A]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aAzZ"
+regexps
+"(?i)[A-Z]+"
+-;-;-;-
+0-4;0-4;0-4;0-4
+"^(?:(?i)[A-Z]+)$"
+-;-;-;-
+0-4;0-4;0-4;0-4
+"^(?:(?i)[A-Z]+)"
+-;-;-;-
+0-4;0-4;0-4;0-4
+"(?:(?i)[A-Z]+)$"
+-;-;-;-
+0-4;0-4;0-4;0-4
+strings
+""
+"Aa\\"
+regexps
+"(?i)[^\\\\]+"
+-;-;-;-
+-;0-2;-;0-2
+"^(?:(?i)[^\\\\]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?i)[^\\\\]+)"
+-;-;-;-
+-;0-2;-;0-2
+"(?:(?i)[^\\\\]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"acegikmoqsuwyACEGIKMOQSUWY"
+regexps
+"(?i)[acegikmoqsuwy]+"
+-;-;-;-
+0-26;0-26;0-26;0-26
+"^(?:(?i)[acegikmoqsuwy]+)$"
+-;-;-;-
+0-26;0-26;0-26;0-26
+"^(?:(?i)[acegikmoqsuwy]+)"
+-;-;-;-
+0-26;0-26;0-26;0-26
+"(?:(?i)[acegikmoqsuwy]+)$"
+-;-;-;-
+0-26;0-26;0-26;0-26
+strings
+""
+"@AaB"
+regexps
+"[@-A]+"
+-;-;-;-
+-;0-2;-;0-2
+"^(?:[@-A]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[@-A]+)"
+-;-;-;-
+-;0-2;-;0-2
+"(?:[@-A]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aAzZ"
+regexps
+"[A-Z]+"
+-;-;-;-
+-;1-2;-;1-2
+"^(?:[A-Z]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[A-Z]+)"
+-;-;-;-
+-;-;-;-
+"(?:[A-Z]+)$"
+-;-;-;-
+-;3-4;-;3-4
+strings
+""
+"Aa\\"
+regexps
+"[^\\\\]+"
+-;-;-;-
+-;0-2;-;0-2
+"^(?:[^\\\\]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[^\\\\]+)"
+-;-;-;-
+-;0-2;-;0-2
+"(?:[^\\\\]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"acegikmoqsuwyACEGIKMOQSUWY"
+regexps
+"[acegikmoqsuwy]+"
+-;-;-;-
+-;0-13;-;0-13
+"^(?:[acegikmoqsuwy]+)$"
+-;-;-;-
+-;-;-;-
+"^(?:[acegikmoqsuwy]+)"
+-;-;-;-
+-;0-13;-;0-13
+"(?:[acegikmoqsuwy]+)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"^abc"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:^abc)$"
+-;-;-;-
+-;-;-;-
+"^(?:^abc)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:^abc)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aabcdef"
+regexps
+"^abc"
+-;-;-;-
+-;-;-;-
+"^(?:^abc)$"
+-;-;-;-
+-;-;-;-
+"^(?:^abc)"
+-;-;-;-
+-;-;-;-
+"(?:^abc)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"^[ay]*[bx]+c"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+"^(?:^[ay]*[bx]+c)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aabcdef"
+regexps
+"^[ay]*[bx]+c"
+-;-;-;-
+-;0-4;-;0-4
+"^(?:^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+"^(?:^[ay]*[bx]+c)"
+-;-;-;-
+-;0-4;-;0-4
+"(?:^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"def$"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:def$)$"
+-;-;-;-
+-;-;-;-
+"^(?:def$)"
+-;-;-;-
+-;-;-;-
+"(?:def$)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"abcdeff"
+regexps
+"def$"
+-;-;-;-
+-;-;-;-
+"^(?:def$)$"
+-;-;-;-
+-;-;-;-
+"^(?:def$)"
+-;-;-;-
+-;-;-;-
+"(?:def$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"d[ex][fy]$"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:d[ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:d[ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:d[ex][fy]$)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"abcdeff"
+regexps
+"d[ex][fy]$"
+-;-;-;-
+-;-;-;-
+"^(?:d[ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:d[ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:d[ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"[dz][ex][fy]$"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:[dz][ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:[dz][ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:[dz][ex][fy]$)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"abcdeff"
+regexps
+"[dz][ex][fy]$"
+-;-;-;-
+-;-;-;-
+"^(?:[dz][ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:[dz][ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:[dz][ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"(?m)^abc"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:(?m)^abc)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)^abc)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:(?m)^abc)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aabcdef"
+regexps
+"(?m)^abc"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)^abc)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)^abc)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)^abc)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"(?m)^[ay]*[bx]+c"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:(?m)^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)^[ay]*[bx]+c)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:(?m)^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"aabcdef"
+regexps
+"(?m)^[ay]*[bx]+c"
+-;-;-;-
+-;0-4;-;0-4
+"^(?:(?m)^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)^[ay]*[bx]+c)"
+-;-;-;-
+-;0-4;-;0-4
+"(?:(?m)^[ay]*[bx]+c)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"(?m)def$"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:(?m)def$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)def$)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)def$)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"abcdeff"
+regexps
+"(?m)def$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)def$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)def$)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)def$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"(?m)d[ex][fy]$"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:(?m)d[ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)d[ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)d[ex][fy]$)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"abcdeff"
+regexps
+"(?m)d[ex][fy]$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)d[ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)d[ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)d[ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"abcdef"
+regexps
+"(?m)[dz][ex][fy]$"
+-;-;-;-
+-;3-6;-;3-6
+"^(?:(?m)[dz][ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)[dz][ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)[dz][ex][fy]$)$"
+-;-;-;-
+-;3-6;-;3-6
+strings
+""
+"abcdeff"
+regexps
+"(?m)[dz][ex][fy]$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)[dz][ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+"^(?:(?m)[dz][ex][fy]$)"
+-;-;-;-
+-;-;-;-
+"(?:(?m)[dz][ex][fy]$)$"
+-;-;-;-
+-;-;-;-
+strings
+""
+"a"
+regexps
+"^"
+0-0;0-0;0-0;0-0
+-;0-0;-;0-0
+"^(?:^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^)"
+0-0;0-0;0-0;0-0
+-;0-0;-;0-0
+"(?:^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"a"
+regexps
+"^^"
+0-0;0-0;0-0;0-0
+-;0-0;-;0-0
+"^(?:^^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+"^(?:^^)"
+0-0;0-0;0-0;0-0
+-;0-0;-;0-0
+"(?:^^)$"
+0-0;0-0;0-0;0-0
+-;-;-;-
+strings
+""
+"a"
+regexps
+"a"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:a)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:a)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:a)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"a"
+regexps
+"ab*"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:ab*)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:ab*)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:ab*)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"a"
+regexps
+"a\\C*"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:a\\C*)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"^(?:a\\C*)"
+-;-;-;-
+0-1;0-1;0-1;0-1
+"(?:a\\C*)$"
+-;-;-;-
+0-1;0-1;0-1;0-1
+strings
+""
+"baba"
+regexps
+"a\\C*|ba\\C"
+-;-;-;-
+-;0-3;-;0-3
+"^(?:a\\C*|ba\\C)$"
+-;-;-;-
+-;-;-;-
+"^(?:a\\C*|ba\\C)"
+-;-;-;-
+-;0-3;-;0-3
+"(?:a\\C*|ba\\C)$"
+-;-;-;-
+-;1-4;-;1-4
diff --git a/libgo/go/regexp/testdata/repetition.dat b/libgo/go/regexp/testdata/repetition.dat
new file mode 100644
index 00000000000..e6361f51a97
--- /dev/null
+++ b/libgo/go/regexp/testdata/repetition.dat
@@ -0,0 +1,163 @@
+NOTE implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+# NOMATCH
+# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E ((..)|(.)) NULL NOMATCH
+E ((..)|(.))((..)|(.)) NULL NOMATCH
+E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
+
+E ((..)|(.)){1} NULL NOMATCH
+E ((..)|(.)){2} NULL NOMATCH
+E ((..)|(.)){3} NULL NOMATCH
+
+E ((..)|(.))* NULL (0,0)
+
+E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
+E ((..)|(.))((..)|(.)) a NOMATCH
+E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
+
+E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
+E ((..)|(.)){2} a NOMATCH
+E ((..)|(.)){3} a NOMATCH
+
+E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
+
+E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
+
+E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
+E ((..)|(.)){3} aa NOMATCH
+
+E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
+
+E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
+#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
+E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
+E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
+
+#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
+E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
+
+E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
+#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
+E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
+
+E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
+
+E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
+#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
+E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
+
+#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
+E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
+
+E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
+E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
+E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
+
+E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
+
+NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
+:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
+:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
+:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
+:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
+:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
+:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
+:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
+:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
+#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
+:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
+:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
+:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
+:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
+:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
+:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
+:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
+#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
+:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
+:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation). The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
+:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
+:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
+:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
+#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
+:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
+#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
+:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
+#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
+:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
diff --git a/libgo/go/regexp/testdata/testregex.c b/libgo/go/regexp/testdata/testregex.c
new file mode 100644
index 00000000000..37545d057f8
--- /dev/null
+++ b/libgo/go/regexp/testdata/testregex.c
@@ -0,0 +1,2286 @@
+#pragma prototyped noticed
+
+/*
+ * regex(3) test harness
+ *
+ * build: cc -o testregex testregex.c
+ * help: testregex --man
+ * note: REG_* features are detected by #ifdef; if REG_* are enums
+ * then supply #define REG_foo REG_foo for each enum REG_foo
+ *
+ * Glenn Fowler <gsf@research.att.com>
+ * AT&T Research
+ *
+ * PLEASE: publish your tests so everyone can benefit
+ *
+ * The following license covers testregex.c and all associated test data.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+ * without restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, and/or sell copies of the
+ * Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following disclaimer:
+ *
+ * THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+static const char id[] = "\n@(#)$Id: testregex (AT&T Research) 2010-06-10 $\0\n";
+
+#if _PACKAGE_ast
+#include <ast.h>
+#else
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+#include <regex.h>
+#include <ctype.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef __STDC__
+#include <stdlib.h>
+#include <locale.h>
+#endif
+
+#ifndef RE_DUP_MAX
+#define RE_DUP_MAX 32767
+#endif
+
+#if !_PACKAGE_ast
+#undef REG_DISCIPLINE
+#endif
+
+#ifndef REG_DELIMITED
+#undef _REG_subcomp
+#endif
+
+#define TEST_ARE 0x00000001
+#define TEST_BRE 0x00000002
+#define TEST_ERE 0x00000004
+#define TEST_KRE 0x00000008
+#define TEST_LRE 0x00000010
+#define TEST_SRE 0x00000020
+
+#define TEST_EXPAND 0x00000100
+#define TEST_LENIENT 0x00000200
+
+#define TEST_QUERY 0x00000400
+#define TEST_SUB 0x00000800
+#define TEST_UNSPECIFIED 0x00001000
+#define TEST_VERIFY 0x00002000
+#define TEST_AND 0x00004000
+#define TEST_OR 0x00008000
+
+#define TEST_DELIMIT 0x00010000
+#define TEST_OK 0x00020000
+#define TEST_SAME 0x00040000
+
+#define TEST_ACTUAL 0x00100000
+#define TEST_BASELINE 0x00200000
+#define TEST_FAIL 0x00400000
+#define TEST_PASS 0x00800000
+#define TEST_SUMMARY 0x01000000
+
+#define TEST_IGNORE_ERROR 0x02000000
+#define TEST_IGNORE_OVER 0x04000000
+#define TEST_IGNORE_POSITION 0x08000000
+
+#define TEST_CATCH 0x10000000
+#define TEST_VERBOSE 0x20000000
+
+#define TEST_DECOMP 0x40000000
+
+#define TEST_GLOBAL (TEST_ACTUAL|TEST_AND|TEST_BASELINE|TEST_CATCH|TEST_FAIL|TEST_IGNORE_ERROR|TEST_IGNORE_OVER|TEST_IGNORE_POSITION|TEST_OR|TEST_PASS|TEST_SUMMARY|TEST_VERBOSE)
+
+#ifdef REG_DISCIPLINE
+
+
+#include <stk.h>
+
+typedef struct Disc_s
+{
+ regdisc_t disc;
+ int ordinal;
+ Sfio_t* sp;
+} Disc_t;
+
+static void*
+compf(const regex_t* re, const char* xstr, size_t xlen, regdisc_t* disc)
+{
+ Disc_t* dp = (Disc_t*)disc;
+
+ return (void*)((char*)0 + ++dp->ordinal);
+}
+
+static int
+execf(const regex_t* re, void* data, const char* xstr, size_t xlen, const char* sstr, size_t slen, char** snxt, regdisc_t* disc)
+{
+ Disc_t* dp = (Disc_t*)disc;
+
+ sfprintf(dp->sp, "{%-.*s}(%lu:%d)", xlen, xstr, (char*)data - (char*)0, slen);
+ return atoi(xstr);
+}
+
+static void*
+resizef(void* handle, void* data, size_t size)
+{
+ if (!size)
+ return 0;
+ return stkalloc((Sfio_t*)handle, size);
+}
+
+#endif
+
+#ifndef NiL
+#ifdef __STDC__
+#define NiL 0
+#else
+#define NiL (char*)0
+#endif
+#endif
+
+#define H(x) do{if(html)fprintf(stderr,x);}while(0)
+#define T(x) fprintf(stderr,x)
+
+static void
+help(int html)
+{
+H("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n");
+H("<HTML>\n");
+H("<HEAD>\n");
+H("<TITLE>testregex man document</TITLE>\n");
+H("</HEAD>\n");
+H("<BODY bgcolor=white>\n");
+H("<PRE>\n");
+T("NAME\n");
+T(" testregex - regex(3) test harness\n");
+T("\n");
+T("SYNOPSIS\n");
+T(" testregex [ options ]\n");
+T("\n");
+T("DESCRIPTION\n");
+T(" testregex reads regex(3) test specifications, one per line, from the\n");
+T(" standard input and writes one output line for each failed test. A\n");
+T(" summary line is written after all tests are done. Each successful\n");
+T(" test is run again with REG_NOSUB. Unsupported features are noted\n");
+T(" before the first test, and tests requiring these features are\n");
+T(" silently ignored.\n");
+T("\n");
+T("OPTIONS\n");
+T(" -c catch signals and non-terminating calls\n");
+T(" -e ignore error return mismatches\n");
+T(" -h list help on standard error\n");
+T(" -n do not repeat successful tests with regnexec()\n");
+T(" -o ignore match[] overrun errors\n");
+T(" -p ignore negative position mismatches\n");
+T(" -s use stack instead of malloc\n");
+T(" -x do not repeat successful tests with REG_NOSUB\n");
+T(" -v list each test line\n");
+T(" -A list failed test lines with actual answers\n");
+T(" -B list all test lines with actual answers\n");
+T(" -F list failed test lines\n");
+T(" -P list passed test lines\n");
+T(" -S output one summary line\n");
+T("\n");
+T("INPUT FORMAT\n");
+T(" Input lines may be blank, a comment beginning with #, or a test\n");
+T(" specification. A specification is five fields separated by one\n");
+T(" or more tabs. NULL denotes the empty string and NIL denotes the\n");
+T(" 0 pointer.\n");
+T("\n");
+T(" Field 1: the regex(3) flags to apply, one character per REG_feature\n");
+T(" flag. The test is skipped if REG_feature is not supported by the\n");
+T(" implementation. If the first character is not [BEASKLP] then the\n");
+T(" specification is a global control line. One or more of [BEASKLP] may be\n");
+T(" specified; the test will be repeated for each mode.\n");
+T("\n");
+T(" B basic BRE (grep, ed, sed)\n");
+T(" E REG_EXTENDED ERE (egrep)\n");
+T(" A REG_AUGMENTED ARE (egrep with negation)\n");
+T(" S REG_SHELL SRE (sh glob)\n");
+T(" K REG_SHELL|REG_AUGMENTED KRE (ksh glob)\n");
+T(" L REG_LITERAL LRE (fgrep)\n");
+T("\n");
+T(" a REG_LEFT|REG_RIGHT implicit ^...$\n");
+T(" b REG_NOTBOL lhs does not match ^\n");
+T(" c REG_COMMENT ignore space and #...\\n\n");
+T(" d REG_SHELL_DOT explicit leading . match\n");
+T(" e REG_NOTEOL rhs does not match $\n");
+T(" f REG_MULTIPLE multiple \\n separated patterns\n");
+T(" g FNM_LEADING_DIR testfnmatch only -- match until /\n");
+T(" h REG_MULTIREF multiple digit backref\n");
+T(" i REG_ICASE ignore case\n");
+T(" j REG_SPAN . matches \\n\n");
+T(" k REG_ESCAPE \\ to ecape [...] delimiter\n");
+T(" l REG_LEFT implicit ^...\n");
+T(" m REG_MINIMAL minimal match\n");
+T(" n REG_NEWLINE explicit \\n match\n");
+T(" o REG_ENCLOSED (|&) magic inside [@|&](...)\n");
+T(" p REG_SHELL_PATH explicit / match\n");
+T(" q REG_DELIMITED delimited pattern\n");
+T(" r REG_RIGHT implicit ...$\n");
+T(" s REG_SHELL_ESCAPED \\ not special\n");
+T(" t REG_MUSTDELIM all delimiters must be specified\n");
+T(" u standard unspecified behavior -- errors not counted\n");
+T(" v REG_CLASS_ESCAPE \\ special inside [...]\n");
+T(" w REG_NOSUB no subexpression match array\n");
+T(" x REG_LENIENT let some errors slide\n");
+T(" y REG_LEFT regexec() implicit ^...\n");
+T(" z REG_NULL NULL subexpressions ok\n");
+T(" $ expand C \\c escapes in fields 2 and 3\n");
+T(" / field 2 is a regsubcomp() expression\n");
+T(" = field 3 is a regdecomp() expression\n");
+T("\n");
+T(" Field 1 control lines:\n");
+T("\n");
+T(" C set LC_COLLATE and LC_CTYPE to locale in field 2\n");
+T("\n");
+T(" ?test ... output field 5 if passed and != EXPECTED, silent otherwise\n");
+T(" &test ... output field 5 if current and previous passed\n");
+T(" |test ... output field 5 if current passed and previous failed\n");
+T(" ; ... output field 2 if previous failed\n");
+T(" {test ... skip if failed until }\n");
+T(" } end of skip\n");
+T("\n");
+T(" : comment comment copied as output NOTE\n");
+T(" :comment:test :comment: ignored\n");
+T(" N[OTE] comment comment copied as output NOTE\n");
+T(" T[EST] comment comment\n");
+T("\n");
+T(" number use number for nmatch (20 by default)\n");
+T("\n");
+T(" Field 2: the regular expression pattern; SAME uses the pattern from\n");
+T(" the previous specification. RE_DUP_MAX inside {...} expands to the\n");
+T(" value from <limits.h>.\n");
+T("\n");
+T(" Field 3: the string to match. X...{RE_DUP_MAX} expands to RE_DUP_MAX\n");
+T(" copies of X.\n");
+T("\n");
+T(" Field 4: the test outcome. This is either one of the posix error\n");
+T(" codes (with REG_ omitted) or the match array, a list of (m,n)\n");
+T(" entries with m and n being first and last+1 positions in the\n");
+T(" field 3 string, or NULL if REG_NOSUB is in effect and success\n");
+T(" is expected. BADPAT is acceptable in place of any regcomp(3)\n");
+T(" error code. The match[] array is initialized to (-2,-2) before\n");
+T(" each test. All array elements from 0 to nmatch-1 must be specified\n");
+T(" in the outcome. Unspecified endpoints (offset -1) are denoted by ?.\n");
+T(" Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a\n");
+T(" matched (?{...}) expression, where x is the text enclosed by {...},\n");
+T(" o is the expression ordinal counting from 1, and n is the length of\n");
+T(" the unmatched portion of the subject string. If x starts with a\n");
+T(" number then that is the return value of re_execf(), otherwise 0 is\n");
+T(" returned. RE_DUP_MAX[-+]N expands to the <limits.h> value -+N.\n");
+T("\n");
+T(" Field 5: optional comment appended to the report.\n");
+T("\n");
+T("CAVEAT\n");
+T(" If a regex implementation misbehaves with memory then all bets are off.\n");
+T("\n");
+T("CONTRIBUTORS\n");
+T(" Glenn Fowler gsf@research.att.com (ksh strmatch, regex extensions)\n");
+T(" David Korn dgk@research.att.com (ksh glob matcher)\n");
+T(" Doug McIlroy mcilroy@dartmouth.edu (ast regex/testre in C++)\n");
+T(" Tom Lord lord@regexps.com (rx tests)\n");
+T(" Henry Spencer henry@zoo.toronto.edu (original public regex)\n");
+T(" Andrew Hume andrew@research.att.com (gre tests)\n");
+T(" John Maddock John_Maddock@compuserve.com (regex++ tests)\n");
+T(" Philip Hazel ph10@cam.ac.uk (pcre tests)\n");
+T(" Ville Laurikari vl@iki.fi (libtre tests)\n");
+H("</PRE>\n");
+H("</BODY>\n");
+H("</HTML>\n");
+}
+
+#ifndef elementsof
+#define elementsof(x) (sizeof(x)/sizeof(x[0]))
+#endif
+
+#ifndef streq
+#define streq(a,b) (*(a)==*(b)&&!strcmp(a,b))
+#endif
+
+#define HUNG 2
+#define NOTEST (~0)
+
+#ifndef REG_TEST_DEFAULT
+#define REG_TEST_DEFAULT 0
+#endif
+
+#ifndef REG_EXEC_DEFAULT
+#define REG_EXEC_DEFAULT 0
+#endif
+
+static const char* unsupported[] =
+{
+ "BASIC",
+#ifndef REG_EXTENDED
+ "EXTENDED",
+#endif
+#ifndef REG_AUGMENTED
+ "AUGMENTED",
+#endif
+#ifndef REG_SHELL
+ "SHELL",
+#endif
+
+#ifndef REG_CLASS_ESCAPE
+ "CLASS_ESCAPE",
+#endif
+#ifndef REG_COMMENT
+ "COMMENT",
+#endif
+#ifndef REG_DELIMITED
+ "DELIMITED",
+#endif
+#ifndef REG_DISCIPLINE
+ "DISCIPLINE",
+#endif
+#ifndef REG_ESCAPE
+ "ESCAPE",
+#endif
+#ifndef REG_ICASE
+ "ICASE",
+#endif
+#ifndef REG_LEFT
+ "LEFT",
+#endif
+#ifndef REG_LENIENT
+ "LENIENT",
+#endif
+#ifndef REG_LITERAL
+ "LITERAL",
+#endif
+#ifndef REG_MINIMAL
+ "MINIMAL",
+#endif
+#ifndef REG_MULTIPLE
+ "MULTIPLE",
+#endif
+#ifndef REG_MULTIREF
+ "MULTIREF",
+#endif
+#ifndef REG_MUSTDELIM
+ "MUSTDELIM",
+#endif
+#ifndef REG_NEWLINE
+ "NEWLINE",
+#endif
+#ifndef REG_NOTBOL
+ "NOTBOL",
+#endif
+#ifndef REG_NOTEOL
+ "NOTEOL",
+#endif
+#ifndef REG_NULL
+ "NULL",
+#endif
+#ifndef REG_RIGHT
+ "RIGHT",
+#endif
+#ifndef REG_SHELL_DOT
+ "SHELL_DOT",
+#endif
+#ifndef REG_SHELL_ESCAPED
+ "SHELL_ESCAPED",
+#endif
+#ifndef REG_SHELL_GROUP
+ "SHELL_GROUP",
+#endif
+#ifndef REG_SHELL_PATH
+ "SHELL_PATH",
+#endif
+#ifndef REG_SPAN
+ "SPAN",
+#endif
+#if REG_NOSUB & REG_TEST_DEFAULT
+ "SUBMATCH",
+#endif
+#if !_REG_nexec
+ "regnexec",
+#endif
+#if !_REG_subcomp
+ "regsubcomp",
+#endif
+#if !_REG_decomp
+ "redecomp",
+#endif
+ 0
+};
+
+#ifndef REG_CLASS_ESCAPE
+#define REG_CLASS_ESCAPE NOTEST
+#endif
+#ifndef REG_COMMENT
+#define REG_COMMENT NOTEST
+#endif
+#ifndef REG_DELIMITED
+#define REG_DELIMITED NOTEST
+#endif
+#ifndef REG_ESCAPE
+#define REG_ESCAPE NOTEST
+#endif
+#ifndef REG_ICASE
+#define REG_ICASE NOTEST
+#endif
+#ifndef REG_LEFT
+#define REG_LEFT NOTEST
+#endif
+#ifndef REG_LENIENT
+#define REG_LENIENT 0
+#endif
+#ifndef REG_MINIMAL
+#define REG_MINIMAL NOTEST
+#endif
+#ifndef REG_MULTIPLE
+#define REG_MULTIPLE NOTEST
+#endif
+#ifndef REG_MULTIREF
+#define REG_MULTIREF NOTEST
+#endif
+#ifndef REG_MUSTDELIM
+#define REG_MUSTDELIM NOTEST
+#endif
+#ifndef REG_NEWLINE
+#define REG_NEWLINE NOTEST
+#endif
+#ifndef REG_NOTBOL
+#define REG_NOTBOL NOTEST
+#endif
+#ifndef REG_NOTEOL
+#define REG_NOTEOL NOTEST
+#endif
+#ifndef REG_NULL
+#define REG_NULL NOTEST
+#endif
+#ifndef REG_RIGHT
+#define REG_RIGHT NOTEST
+#endif
+#ifndef REG_SHELL_DOT
+#define REG_SHELL_DOT NOTEST
+#endif
+#ifndef REG_SHELL_ESCAPED
+#define REG_SHELL_ESCAPED NOTEST
+#endif
+#ifndef REG_SHELL_GROUP
+#define REG_SHELL_GROUP NOTEST
+#endif
+#ifndef REG_SHELL_PATH
+#define REG_SHELL_PATH NOTEST
+#endif
+#ifndef REG_SPAN
+#define REG_SPAN NOTEST
+#endif
+
+#define REG_UNKNOWN (-1)
+
+#ifndef REG_ENEWLINE
+#define REG_ENEWLINE (REG_UNKNOWN-1)
+#endif
+#ifndef REG_ENULL
+#ifndef REG_EMPTY
+#define REG_ENULL (REG_UNKNOWN-2)
+#else
+#define REG_ENULL REG_EMPTY
+#endif
+#endif
+#ifndef REG_ECOUNT
+#define REG_ECOUNT (REG_UNKNOWN-3)
+#endif
+#ifndef REG_BADESC
+#define REG_BADESC (REG_UNKNOWN-4)
+#endif
+#ifndef REG_EMEM
+#define REG_EMEM (REG_UNKNOWN-5)
+#endif
+#ifndef REG_EHUNG
+#define REG_EHUNG (REG_UNKNOWN-6)
+#endif
+#ifndef REG_EBUS
+#define REG_EBUS (REG_UNKNOWN-7)
+#endif
+#ifndef REG_EFAULT
+#define REG_EFAULT (REG_UNKNOWN-8)
+#endif
+#ifndef REG_EFLAGS
+#define REG_EFLAGS (REG_UNKNOWN-9)
+#endif
+#ifndef REG_EDELIM
+#define REG_EDELIM (REG_UNKNOWN-9)
+#endif
+
+static const struct { int code; char* name; } codes[] =
+{
+ REG_UNKNOWN, "UNKNOWN",
+ REG_NOMATCH, "NOMATCH",
+ REG_BADPAT, "BADPAT",
+ REG_ECOLLATE, "ECOLLATE",
+ REG_ECTYPE, "ECTYPE",
+ REG_EESCAPE, "EESCAPE",
+ REG_ESUBREG, "ESUBREG",
+ REG_EBRACK, "EBRACK",
+ REG_EPAREN, "EPAREN",
+ REG_EBRACE, "EBRACE",
+ REG_BADBR, "BADBR",
+ REG_ERANGE, "ERANGE",
+ REG_ESPACE, "ESPACE",
+ REG_BADRPT, "BADRPT",
+ REG_ENEWLINE, "ENEWLINE",
+ REG_ENULL, "ENULL",
+ REG_ECOUNT, "ECOUNT",
+ REG_BADESC, "BADESC",
+ REG_EMEM, "EMEM",
+ REG_EHUNG, "EHUNG",
+ REG_EBUS, "EBUS",
+ REG_EFAULT, "EFAULT",
+ REG_EFLAGS, "EFLAGS",
+ REG_EDELIM, "EDELIM",
+};
+
+static struct
+{
+ regmatch_t NOMATCH;
+ int errors;
+ int extracted;
+ int ignored;
+ int lineno;
+ int passed;
+ int signals;
+ int unspecified;
+ int verify;
+ int warnings;
+ char* file;
+ char* stack;
+ char* which;
+ jmp_buf gotcha;
+#ifdef REG_DISCIPLINE
+ Disc_t disc;
+#endif
+} state;
+
+static void
+quote(char* s, int len, unsigned long test)
+{
+ unsigned char* u = (unsigned char*)s;
+ unsigned char* e;
+ int c;
+#ifdef MB_CUR_MAX
+ int w;
+#endif
+
+ if (!u)
+ printf("NIL");
+ else if (!*u && len <= 1)
+ printf("NULL");
+ else if (test & TEST_EXPAND)
+ {
+ if (len < 0)
+ len = strlen((char*)u);
+ e = u + len;
+ if (test & TEST_DELIMIT)
+ printf("\"");
+ while (u < e)
+ switch (c = *u++)
+ {
+ case '\\':
+ printf("\\\\");
+ break;
+ case '"':
+ if (test & TEST_DELIMIT)
+ printf("\\\"");
+ else
+ printf("\"");
+ break;
+ case '\a':
+ printf("\\a");
+ break;
+ case '\b':
+ printf("\\b");
+ break;
+ case 033:
+ printf("\\e");
+ break;
+ case '\f':
+ printf("\\f");
+ break;
+ case '\n':
+ printf("\\n");
+ break;
+ case '\r':
+ printf("\\r");
+ break;
+ case '\t':
+ printf("\\t");
+ break;
+ case '\v':
+ printf("\\v");
+ break;
+ default:
+#ifdef MB_CUR_MAX
+ s = (char*)u - 1;
+ if ((w = mblen(s, (char*)e - s)) > 1)
+ {
+ u += w - 1;
+ fwrite(s, 1, w, stdout);
+ }
+ else
+#endif
+ if (!iscntrl(c) && isprint(c))
+ putchar(c);
+ else
+ printf("\\x%02x", c);
+ break;
+ }
+ if (test & TEST_DELIMIT)
+ printf("\"");
+ }
+ else
+ printf("%s", s);
+}
+
+static void
+report(char* comment, char* fun, char* re, char* s, int len, char* msg, int flags, unsigned long test)
+{
+ if (state.file)
+ printf("%s:", state.file);
+ printf("%d:", state.lineno);
+ if (re)
+ {
+ printf(" ");
+ quote(re, -1, test|TEST_DELIMIT);
+ if (s)
+ {
+ printf(" versus ");
+ quote(s, len, test|TEST_DELIMIT);
+ }
+ }
+ if (test & TEST_UNSPECIFIED)
+ {
+ state.unspecified++;
+ printf(" unspecified behavior");
+ }
+ else
+ state.errors++;
+ if (state.which)
+ printf(" %s", state.which);
+ if (flags & REG_NOSUB)
+ printf(" NOSUB");
+ if (fun)
+ printf(" %s", fun);
+ if (comment[strlen(comment)-1] == '\n')
+ printf(" %s", comment);
+ else
+ {
+ printf(" %s: ", comment);
+ if (msg)
+ printf("%s: ", msg);
+ }
+}
+
+static void
+error(regex_t* preg, int code)
+{
+ char* msg;
+ char buf[256];
+
+ switch (code)
+ {
+ case REG_EBUS:
+ msg = "bus error";
+ break;
+ case REG_EFAULT:
+ msg = "memory fault";
+ break;
+ case REG_EHUNG:
+ msg = "did not terminate";
+ break;
+ default:
+ regerror(code, preg, msg = buf, sizeof buf);
+ break;
+ }
+ printf("%s\n", msg);
+}
+
+static void
+bad(char* comment, char* re, char* s, int len, unsigned long test)
+{
+ printf("bad test case ");
+ report(comment, NiL, re, s, len, NiL, 0, test);
+ exit(1);
+}
+
+static int
+escape(char* s)
+{
+ char* b;
+ char* t;
+ char* q;
+ char* e;
+ int c;
+
+ for (b = t = s; *t = *s; s++, t++)
+ if (*s == '\\')
+ switch (*++s)
+ {
+ case '\\':
+ break;
+ case 'a':
+ *t = '\a';
+ break;
+ case 'b':
+ *t = '\b';
+ break;
+ case 'c':
+ if (*t = *++s)
+ *t &= 037;
+ else
+ s--;
+ break;
+ case 'e':
+ case 'E':
+ *t = 033;
+ break;
+ case 'f':
+ *t = '\f';
+ break;
+ case 'n':
+ *t = '\n';
+ break;
+ case 'r':
+ *t = '\r';
+ break;
+ case 's':
+ *t = ' ';
+ break;
+ case 't':
+ *t = '\t';
+ break;
+ case 'v':
+ *t = '\v';
+ break;
+ case 'u':
+ case 'x':
+ c = 0;
+ q = c == 'u' ? (s + 5) : (char*)0;
+ e = s + 1;
+ while (!e || !q || s < q)
+ {
+ switch (*++s)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ c = (c << 4) + *s - 'a' + 10;
+ continue;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ c = (c << 4) + *s - 'A' + 10;
+ continue;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c = (c << 4) + *s - '0';
+ continue;
+ case '{':
+ case '[':
+ if (s != e)
+ {
+ s--;
+ break;
+ }
+ e = 0;
+ continue;
+ case '}':
+ case ']':
+ if (e)
+ s--;
+ break;
+ default:
+ s--;
+ break;
+ }
+ break;
+ }
+ *t = c;
+ break;
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ c = *s - '0';
+ q = s + 2;
+ while (s < q)
+ {
+ switch (*++s)
+ {
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ c = (c << 3) + *s - '0';
+ break;
+ default:
+ q = --s;
+ break;
+ }
+ }
+ *t = c;
+ break;
+ default:
+ *(s + 1) = 0;
+ bad("invalid C \\ escape\n", s - 1, NiL, 0, 0);
+ }
+ return t - b;
+}
+
+static void
+matchoffprint(int off)
+{
+ switch (off)
+ {
+ case -2:
+ printf("X");
+ break;
+ case -1:
+ printf("?");
+ break;
+ default:
+ printf("%d", off);
+ break;
+ }
+}
+
+static void
+matchprint(regmatch_t* match, int nmatch, int nsub, char* ans, unsigned long test)
+{
+ int i;
+
+ for (; nmatch > nsub + 1; nmatch--)
+ if ((match[nmatch-1].rm_so != -1 || match[nmatch-1].rm_eo != -1) && (!(test & TEST_IGNORE_POSITION) || match[nmatch-1].rm_so >= 0 && match[nmatch-1].rm_eo >= 0))
+ break;
+ for (i = 0; i < nmatch; i++)
+ {
+ printf("(");
+ matchoffprint(match[i].rm_so);
+ printf(",");
+ matchoffprint(match[i].rm_eo);
+ printf(")");
+ }
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE)))
+ {
+ if (ans)
+ printf(" expected: %s", ans);
+ printf("\n");
+ }
+}
+
+static int
+matchcheck(regmatch_t* match, int nmatch, int nsub, char* ans, char* re, char* s, int len, int flags, unsigned long test)
+{
+ char* p;
+ int i;
+ int m;
+ int n;
+
+ if (streq(ans, "OK"))
+ return test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY);
+ for (i = 0, p = ans; i < nmatch && *p; i++)
+ {
+ if (*p == '{')
+ {
+#ifdef REG_DISCIPLINE
+ char* x;
+
+ if (!(x = sfstruse(state.disc.sp)))
+ bad("out of space [discipline string]\n", NiL, NiL, 0, 0);
+ if (strcmp(p, x))
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ return 0;
+ report("callout failed", NiL, re, s, len, NiL, flags, test);
+ quote(p, -1, test);
+ printf(" expected, ");
+ quote(x, -1, test);
+ printf(" returned\n");
+ }
+#endif
+ break;
+ }
+ if (*p++ != '(')
+ bad("improper answer\n", re, s, -1, test);
+ if (*p == '?')
+ {
+ m = -1;
+ p++;
+ }
+ else if (*p == 'R' && !memcmp(p, "RE_DUP_MAX", 10))
+ {
+ m = RE_DUP_MAX;
+ p += 10;
+ if (*p == '+' || *p == '-')
+ m += strtol(p, &p, 10);
+ }
+ else
+ m = strtol(p, &p, 10);
+ if (*p++ != ',')
+ bad("improper answer\n", re, s, -1, test);
+ if (*p == '?')
+ {
+ n = -1;
+ p++;
+ }
+ else if (*p == 'R' && !memcmp(p, "RE_DUP_MAX", 10))
+ {
+ n = RE_DUP_MAX;
+ p += 10;
+ if (*p == '+' || *p == '-')
+ n += strtol(p, &p, 10);
+ }
+ else
+ n = strtol(p, &p, 10);
+ if (*p++ != ')')
+ bad("improper answer\n", re, s, -1, test);
+ if (m!=match[i].rm_so || n!=match[i].rm_eo)
+ {
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)))
+ {
+ report("failed: match was", NiL, re, s, len, NiL, flags, test);
+ matchprint(match, nmatch, nsub, ans, test);
+ }
+ return 0;
+ }
+ }
+ for (; i < nmatch; i++)
+ {
+ if (match[i].rm_so!=-1 || match[i].rm_eo!=-1)
+ {
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_VERIFY)))
+ {
+ if ((test & TEST_IGNORE_POSITION) && (match[i].rm_so<0 || match[i].rm_eo<0))
+ {
+ state.ignored++;
+ return 0;
+ }
+ if (!(test & TEST_SUMMARY))
+ {
+ report("failed: match was", NiL, re, s, len, NiL, flags, test);
+ matchprint(match, nmatch, nsub, ans, test);
+ }
+ }
+ return 0;
+ }
+ }
+ if (!(test & TEST_IGNORE_OVER) && match[nmatch].rm_so != state.NOMATCH.rm_so)
+ {
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)))
+ {
+ report("failed: overran match array", NiL, re, s, len, NiL, flags, test);
+ matchprint(match, nmatch + 1, nsub, NiL, test);
+ }
+ return 0;
+ }
+ return 1;
+}
+
+static void
+sigunblock(int s)
+{
+#ifdef SIG_SETMASK
+ int op;
+ sigset_t mask;
+
+ sigemptyset(&mask);
+ if (s)
+ {
+ sigaddset(&mask, s);
+ op = SIG_UNBLOCK;
+ }
+ else op = SIG_SETMASK;
+ sigprocmask(op, &mask, NiL);
+#else
+#ifdef sigmask
+ sigsetmask(s ? (sigsetmask(0L) & ~sigmask(s)) : 0L);
+#endif
+#endif
+}
+
+static void
+gotcha(int sig)
+{
+ int ret;
+
+ signal(sig, gotcha);
+ alarm(0);
+ state.signals++;
+ switch (sig)
+ {
+ case SIGALRM:
+ ret = REG_EHUNG;
+ break;
+ case SIGBUS:
+ ret = REG_EBUS;
+ break;
+ default:
+ ret = REG_EFAULT;
+ break;
+ }
+ sigunblock(sig);
+ longjmp(state.gotcha, ret);
+}
+
+static char*
+getline(FILE* fp)
+{
+ static char buf[32 * 1024];
+
+ register char* s = buf;
+ register char* e = &buf[sizeof(buf)];
+ register char* b;
+
+ for (;;)
+ {
+ if (!(b = fgets(s, e - s, fp)))
+ return 0;
+ state.lineno++;
+ s += strlen(s);
+ if (s == b || *--s != '\n' || s == b || *(s - 1) != '\\')
+ {
+ *s = 0;
+ break;
+ }
+ s--;
+ }
+ return buf;
+}
+
+static unsigned long
+note(unsigned long level, char* msg, unsigned long skip, unsigned long test)
+{
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)) && !skip)
+ {
+ printf("NOTE\t");
+ if (msg)
+ printf("%s: ", msg);
+ printf("skipping lines %d", state.lineno);
+ }
+ return skip | level;
+}
+
+#define TABS(n) &ts[7-((n)&7)]
+
+static char ts[] = "\t\t\t\t\t\t\t";
+
+static unsigned long
+extract(int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test)
+{
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_OK|TEST_PASS|TEST_SUMMARY))
+ {
+ state.extracted = 1;
+ if (test & TEST_OK)
+ {
+ state.passed++;
+ if ((test & TEST_VERIFY) && !(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+ {
+ if (msg && strcmp(msg, "EXPECTED"))
+ printf("NOTE\t%s\n", msg);
+ return skip;
+ }
+ test &= ~(TEST_PASS|TEST_QUERY);
+ }
+ if (test & (TEST_QUERY|TEST_VERIFY))
+ {
+ if (test & TEST_BASELINE)
+ test &= ~(TEST_BASELINE|TEST_PASS);
+ else
+ test |= TEST_PASS;
+ skip |= level;
+ }
+ if (!(test & TEST_OK))
+ {
+ if (test & TEST_UNSPECIFIED)
+ state.unspecified++;
+ else
+ state.errors++;
+ }
+ if (test & (TEST_PASS|TEST_SUMMARY))
+ return skip;
+ test &= ~TEST_DELIMIT;
+ printf("%s%s", spec, TABS(*tabs++));
+ if ((test & (TEST_BASELINE|TEST_SAME)) == (TEST_BASELINE|TEST_SAME))
+ printf("SAME");
+ else
+ quote(re, -1, test);
+ printf("%s", TABS(*tabs++));
+ quote(s, -1, test);
+ printf("%s", TABS(*tabs++));
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE)) || !accept && !match)
+ printf("%s", ans);
+ else if (accept)
+ printf("%s", accept);
+ else
+ matchprint(match, nmatch, nsub, NiL, test);
+ if (msg)
+ printf("%s%s", TABS(*tabs++), msg);
+ putchar('\n');
+ }
+ else if (test & TEST_QUERY)
+ skip = note(level, msg, skip, test);
+ else if (test & TEST_VERIFY)
+ state.extracted = 1;
+ return skip;
+}
+
+static int
+catchfree(regex_t* preg, int flags, int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test)
+{
+ int eret;
+
+ if (!(test & TEST_CATCH))
+ {
+ regfree(preg);
+ eret = 0;
+ }
+ else if (!(eret = setjmp(state.gotcha)))
+ {
+ alarm(HUNG);
+ regfree(preg);
+ alarm(0);
+ }
+ else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ extract(tabs, spec, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+ else
+ {
+ report("failed", "regfree", re, NiL, -1, msg, flags, test);
+ error(preg, eret);
+ }
+ return eret;
+}
+
+static char*
+expand(char* os, char* ot)
+{
+ char* s = os;
+ char* t;
+ int n = 0;
+ int r;
+ long m;
+
+ for (;;)
+ {
+ switch (*s++)
+ {
+ case 0:
+ break;
+ case '{':
+ n++;
+ continue;
+ case '}':
+ n--;
+ continue;
+ case 'R':
+ if (n == 1 && !memcmp(s, "E_DUP_MAX", 9))
+ {
+ s--;
+ for (t = ot; os < s; *t++ = *os++);
+ r = ((t - ot) >= 5 && t[-1] == '{' && t[-2] == '.' && t[-3] == '.' && t[-4] == '.') ? t[-5] : 0;
+ os = ot;
+ m = RE_DUP_MAX;
+ if (*(s += 10) == '+' || *s == '-')
+ m += strtol(s, &s, 10);
+ if (r)
+ {
+ t -= 5;
+ while (m-- > 0)
+ *t++ = r;
+ while (*s && *s++ != '}');
+ }
+ else
+ t += snprintf(t, 32, "%ld", m);
+ while (*t = *s++)
+ t++;
+ break;
+ }
+ continue;
+ default:
+ continue;
+ }
+ break;
+ }
+ return os;
+}
+
+int
+main(int argc, char** argv)
+{
+ int flags;
+ int cflags;
+ int eflags;
+ int nmatch;
+ int nexec;
+ int nstr;
+ int cret;
+ int eret;
+ int nsub;
+ int i;
+ int j;
+ int expected;
+ int got;
+ int locale;
+ int subunitlen;
+ int testno;
+ unsigned long level;
+ unsigned long skip;
+ char* p;
+ char* line;
+ char* spec;
+ char* re;
+ char* s;
+ char* ans;
+ char* msg;
+ char* fun;
+ char* ppat;
+ char* subunit;
+ char* version;
+ char* field[6];
+ char* delim[6];
+ FILE* fp;
+ int tabs[6];
+ char unit[64];
+ regmatch_t match[100];
+ regex_t preg;
+
+ static char pat[32 * 1024];
+ static char patbuf[32 * 1024];
+ static char strbuf[32 * 1024];
+
+ int nonosub = REG_NOSUB == 0;
+ int nonexec = 0;
+
+ unsigned long test = 0;
+
+ static char* filter[] = { "-", 0 };
+
+ state.NOMATCH.rm_so = state.NOMATCH.rm_eo = -2;
+ p = unit;
+ version = (char*)id + 10;
+ while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace(*p))
+ p++;
+ *p = 0;
+ while ((p = *++argv) && *p == '-')
+ for (;;)
+ {
+ switch (*++p)
+ {
+ case 0:
+ break;
+ case 'c':
+ test |= TEST_CATCH;
+ continue;
+ case 'e':
+ test |= TEST_IGNORE_ERROR;
+ continue;
+ case 'h':
+ case '?':
+ help(0);
+ return 2;
+ case '-':
+ help(p[1] == 'h');
+ return 2;
+ case 'n':
+ nonexec = 1;
+ continue;
+ case 'o':
+ test |= TEST_IGNORE_OVER;
+ continue;
+ case 'p':
+ test |= TEST_IGNORE_POSITION;
+ continue;
+ case 's':
+#ifdef REG_DISCIPLINE
+ if (!(state.stack = stkalloc(stkstd, 0)))
+ fprintf(stderr, "%s: out of space [stack]", unit);
+ state.disc.disc.re_resizef = resizef;
+ state.disc.disc.re_resizehandle = (void*)stkstd;
+#endif
+ continue;
+ case 'x':
+ nonosub = 1;
+ continue;
+ case 'v':
+ test |= TEST_VERBOSE;
+ continue;
+ case 'A':
+ test |= TEST_ACTUAL;
+ continue;
+ case 'B':
+ test |= TEST_BASELINE;
+ continue;
+ case 'F':
+ test |= TEST_FAIL;
+ continue;
+ case 'P':
+ test |= TEST_PASS;
+ continue;
+ case 'S':
+ test |= TEST_SUMMARY;
+ continue;
+ default:
+ fprintf(stderr, "%s: %c: invalid option\n", unit, *p);
+ return 2;
+ }
+ break;
+ }
+ if (!*argv)
+ argv = filter;
+ locale = 0;
+ while (state.file = *argv++)
+ {
+ if (streq(state.file, "-") || streq(state.file, "/dev/stdin") || streq(state.file, "/dev/fd/0"))
+ {
+ state.file = 0;
+ fp = stdin;
+ }
+ else if (!(fp = fopen(state.file, "r")))
+ {
+ fprintf(stderr, "%s: %s: cannot read\n", unit, state.file);
+ return 2;
+ }
+ testno = state.errors = state.ignored = state.lineno = state.passed =
+ state.signals = state.unspecified = state.warnings = 0;
+ skip = 0;
+ level = 1;
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+ {
+ printf("TEST\t%s ", unit);
+ if (s = state.file)
+ {
+ subunit = p = 0;
+ for (;;)
+ {
+ switch (*s++)
+ {
+ case 0:
+ break;
+ case '/':
+ subunit = s;
+ continue;
+ case '.':
+ p = s - 1;
+ continue;
+ default:
+ continue;
+ }
+ break;
+ }
+ if (!subunit)
+ subunit = state.file;
+ if (p < subunit)
+ p = s - 1;
+ subunitlen = p - subunit;
+ printf("%-.*s ", subunitlen, subunit);
+ }
+ else
+ subunit = 0;
+ for (s = version; *s && (*s != ' ' || *(s + 1) != '$'); s++)
+ putchar(*s);
+ if (test & TEST_CATCH)
+ printf(", catch");
+ if (test & TEST_IGNORE_ERROR)
+ printf(", ignore error code mismatches");
+ if (test & TEST_IGNORE_POSITION)
+ printf(", ignore negative position mismatches");
+#ifdef REG_DISCIPLINE
+ if (state.stack)
+ printf(", stack");
+#endif
+ if (test & TEST_VERBOSE)
+ printf(", verbose");
+ printf("\n");
+#ifdef REG_VERSIONID
+ if (regerror(REG_VERSIONID, NiL, pat, sizeof(pat)) > 0)
+ s = pat;
+ else
+#endif
+#ifdef REG_TEST_VERSION
+ s = REG_TEST_VERSION;
+#else
+ s = "regex";
+#endif
+ printf("NOTE\t%s\n", s);
+ if (elementsof(unsupported) > 1)
+ {
+#if (REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) || !defined(REG_EXTENDED)
+ i = 0;
+#else
+ i = REG_EXTENDED != 0;
+#endif
+ for (got = 0; i < elementsof(unsupported) - 1; i++)
+ {
+ if (!got)
+ {
+ got = 1;
+ printf("NOTE\tunsupported: %s", unsupported[i]);
+ }
+ else
+ printf(",%s", unsupported[i]);
+ }
+ if (got)
+ printf("\n");
+ }
+ }
+#ifdef REG_DISCIPLINE
+ state.disc.disc.re_version = REG_VERSION;
+ state.disc.disc.re_compf = compf;
+ state.disc.disc.re_execf = execf;
+ if (!(state.disc.sp = sfstropen()))
+ bad("out of space [discipline string stream]\n", NiL, NiL, 0, 0);
+ preg.re_disc = &state.disc.disc;
+#endif
+ if (test & TEST_CATCH)
+ {
+ signal(SIGALRM, gotcha);
+ signal(SIGBUS, gotcha);
+ signal(SIGSEGV, gotcha);
+ }
+ while (p = getline(fp))
+ {
+
+ /* parse: */
+
+ line = p;
+ if (*p == ':' && !isspace(*(p + 1)))
+ {
+ while (*++p && *p != ':');
+ if (!*p++)
+ {
+ if (test & TEST_BASELINE)
+ printf("%s\n", line);
+ continue;
+ }
+ }
+ while (isspace(*p))
+ p++;
+ if (*p == 0 || *p == '#' || *p == 'T')
+ {
+ if (test & TEST_BASELINE)
+ printf("%s\n", line);
+ continue;
+ }
+ if (*p == ':' || *p == 'N')
+ {
+ if (test & TEST_BASELINE)
+ printf("%s\n", line);
+ else if (!(test & (TEST_ACTUAL|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+ {
+ while (*++p && !isspace(*p));
+ while (isspace(*p))
+ p++;
+ printf("NOTE %s\n", p);
+ }
+ continue;
+ }
+ j = 0;
+ i = 0;
+ field[i++] = p;
+ for (;;)
+ {
+ switch (*p++)
+ {
+ case 0:
+ p--;
+ j = 0;
+ goto checkfield;
+ case '\t':
+ *(delim[i] = p - 1) = 0;
+ j = 1;
+ checkfield:
+ s = field[i - 1];
+ if (streq(s, "NIL"))
+ field[i - 1] = 0;
+ else if (streq(s, "NULL"))
+ *s = 0;
+ while (*p == '\t')
+ {
+ p++;
+ j++;
+ }
+ tabs[i - 1] = j;
+ if (!*p)
+ break;
+ if (i >= elementsof(field))
+ bad("too many fields\n", NiL, NiL, 0, 0);
+ field[i++] = p;
+ /*FALLTHROUGH*/
+ default:
+ continue;
+ }
+ break;
+ }
+ if (!(spec = field[0]))
+ bad("NIL spec\n", NiL, NiL, 0, 0);
+
+ /* interpret: */
+
+ cflags = REG_TEST_DEFAULT;
+ eflags = REG_EXEC_DEFAULT;
+ test &= TEST_GLOBAL;
+ state.extracted = 0;
+ nmatch = 20;
+ nsub = -1;
+ for (p = spec; *p; p++)
+ {
+ if (isdigit(*p))
+ {
+ nmatch = strtol(p, &p, 10);
+ if (nmatch >= elementsof(match))
+ bad("nmatch must be < 100\n", NiL, NiL, 0, 0);
+ p--;
+ continue;
+ }
+ switch (*p)
+ {
+ case 'A':
+ test |= TEST_ARE;
+ continue;
+ case 'B':
+ test |= TEST_BRE;
+ continue;
+ case 'C':
+ if (!(test & TEST_QUERY) && !(skip & level))
+ bad("locale must be nested\n", NiL, NiL, 0, 0);
+ test &= ~TEST_QUERY;
+ if (locale)
+ bad("locale nesting not supported\n", NiL, NiL, 0, 0);
+ if (i != 2)
+ bad("locale field expected\n", NiL, NiL, 0, 0);
+ if (!(skip & level))
+ {
+#if defined(LC_COLLATE) && defined(LC_CTYPE)
+ s = field[1];
+ if (!s || streq(s, "POSIX"))
+ s = "C";
+ if ((ans = setlocale(LC_COLLATE, s)) && streq(ans, "POSIX"))
+ ans = "C";
+ if (!ans || !streq(ans, s) && streq(s, "C"))
+ ans = 0;
+ else if ((ans = setlocale(LC_CTYPE, s)) && streq(ans, "POSIX"))
+ ans = "C";
+ if (!ans || !streq(ans, s) && streq(s, "C"))
+ skip = note(level, s, skip, test);
+ else
+ {
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+ printf("NOTE \"%s\" locale\n", s);
+ locale = level;
+ }
+#else
+ skip = note(level, skip, test, "locales not supported");
+#endif
+ }
+ cflags = NOTEST;
+ continue;
+ case 'E':
+ test |= TEST_ERE;
+ continue;
+ case 'K':
+ test |= TEST_KRE;
+ continue;
+ case 'L':
+ test |= TEST_LRE;
+ continue;
+ case 'S':
+ test |= TEST_SRE;
+ continue;
+
+ case 'a':
+ cflags |= REG_LEFT|REG_RIGHT;
+ continue;
+ case 'b':
+ eflags |= REG_NOTBOL;
+ continue;
+ case 'c':
+ cflags |= REG_COMMENT;
+ continue;
+ case 'd':
+ cflags |= REG_SHELL_DOT;
+ continue;
+ case 'e':
+ eflags |= REG_NOTEOL;
+ continue;
+ case 'f':
+ cflags |= REG_MULTIPLE;
+ continue;
+ case 'g':
+ cflags |= NOTEST;
+ continue;
+ case 'h':
+ cflags |= REG_MULTIREF;
+ continue;
+ case 'i':
+ cflags |= REG_ICASE;
+ continue;
+ case 'j':
+ cflags |= REG_SPAN;
+ continue;
+ case 'k':
+ cflags |= REG_ESCAPE;
+ continue;
+ case 'l':
+ cflags |= REG_LEFT;
+ continue;
+ case 'm':
+ cflags |= REG_MINIMAL;
+ continue;
+ case 'n':
+ cflags |= REG_NEWLINE;
+ continue;
+ case 'o':
+ cflags |= REG_SHELL_GROUP;
+ continue;
+ case 'p':
+ cflags |= REG_SHELL_PATH;
+ continue;
+ case 'q':
+ cflags |= REG_DELIMITED;
+ continue;
+ case 'r':
+ cflags |= REG_RIGHT;
+ continue;
+ case 's':
+ cflags |= REG_SHELL_ESCAPED;
+ continue;
+ case 't':
+ cflags |= REG_MUSTDELIM;
+ continue;
+ case 'u':
+ test |= TEST_UNSPECIFIED;
+ continue;
+ case 'v':
+ cflags |= REG_CLASS_ESCAPE;
+ continue;
+ case 'w':
+ cflags |= REG_NOSUB;
+ continue;
+ case 'x':
+ if (REG_LENIENT)
+ cflags |= REG_LENIENT;
+ else
+ test |= TEST_LENIENT;
+ continue;
+ case 'y':
+ eflags |= REG_LEFT;
+ continue;
+ case 'z':
+ cflags |= REG_NULL;
+ continue;
+
+ case '$':
+ test |= TEST_EXPAND;
+ continue;
+
+ case '/':
+ test |= TEST_SUB;
+ continue;
+
+ case '=':
+ test |= TEST_DECOMP;
+ continue;
+
+ case '?':
+ test |= TEST_VERIFY;
+ test &= ~(TEST_AND|TEST_OR);
+ state.verify = state.passed;
+ continue;
+ case '&':
+ test |= TEST_VERIFY|TEST_AND;
+ test &= ~TEST_OR;
+ continue;
+ case '|':
+ test |= TEST_VERIFY|TEST_OR;
+ test &= ~TEST_AND;
+ continue;
+ case ';':
+ test |= TEST_OR;
+ test &= ~TEST_AND;
+ continue;
+
+ case '{':
+ level <<= 1;
+ if (skip & (level >> 1))
+ {
+ skip |= level;
+ cflags = NOTEST;
+ }
+ else
+ {
+ skip &= ~level;
+ test |= TEST_QUERY;
+ }
+ continue;
+ case '}':
+ if (level == 1)
+ bad("invalid {...} nesting\n", NiL, NiL, 0, 0);
+ if ((skip & level) && !(skip & (level>>1)))
+ {
+ if (!(test & (TEST_BASELINE|TEST_SUMMARY)))
+ {
+ if (test & (TEST_ACTUAL|TEST_FAIL))
+ printf("}\n");
+ else if (!(test & TEST_PASS))
+ printf("-%d\n", state.lineno);
+ }
+ }
+#if defined(LC_COLLATE) && defined(LC_CTYPE)
+ else if (locale & level)
+ {
+ locale = 0;
+ if (!(skip & level))
+ {
+ s = "C";
+ setlocale(LC_COLLATE, s);
+ setlocale(LC_CTYPE, s);
+ if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+ printf("NOTE \"%s\" locale\n", s);
+ else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_PASS))
+ printf("}\n");
+ }
+ else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL))
+ printf("}\n");
+ }
+#endif
+ level >>= 1;
+ cflags = NOTEST;
+ continue;
+
+ default:
+ bad("bad spec\n", spec, NiL, 0, test);
+ break;
+
+ }
+ break;
+ }
+ if ((cflags|eflags) == NOTEST || (skip & level) && (test & TEST_BASELINE))
+ {
+ if (test & TEST_BASELINE)
+ {
+ while (i > 1)
+ *delim[--i] = '\t';
+ printf("%s\n", line);
+ }
+ continue;
+ }
+ if (test & TEST_OR)
+ {
+ if (!(test & TEST_VERIFY))
+ {
+ test &= ~TEST_OR;
+ if (state.passed == state.verify && i > 1)
+ printf("NOTE\t%s\n", field[1]);
+ continue;
+ }
+ else if (state.passed > state.verify)
+ continue;
+ }
+ else if (test & TEST_AND)
+ {
+ if (state.passed == state.verify)
+ continue;
+ state.passed = state.verify;
+ }
+ if (i < ((test & TEST_DECOMP) ? 3 : 4))
+ bad("too few fields\n", NiL, NiL, 0, test);
+ while (i < elementsof(field))
+ field[i++] = 0;
+ if (re = field[1])
+ {
+ if (streq(re, "SAME"))
+ {
+ re = ppat;
+ test |= TEST_SAME;
+ }
+ else
+ {
+ if (test & TEST_EXPAND)
+ escape(re);
+ re = expand(re, patbuf);
+ strcpy(ppat = pat, re);
+ }
+ }
+ else
+ ppat = 0;
+ nstr = -1;
+ if (s = field[2])
+ {
+ s = expand(s, strbuf);
+ if (test & TEST_EXPAND)
+ {
+ nstr = escape(s);
+#if _REG_nexec
+ if (nstr != strlen(s))
+ nexec = nstr;
+#endif
+ }
+ }
+ if (!(ans = field[(test & TEST_DECOMP) ? 2 : 3]))
+ bad("NIL answer\n", NiL, NiL, 0, test);
+ msg = field[4];
+ fflush(stdout);
+ if (test & TEST_SUB)
+#if _REG_subcomp
+ cflags |= REG_DELIMITED;
+#else
+ continue;
+#endif
+#if !_REG_decomp
+ if (test & TEST_DECOMP)
+ continue;
+#endif
+
+ compile:
+
+ if (state.extracted || (skip & level))
+ continue;
+#if !(REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
+#ifdef REG_EXTENDED
+ if (REG_EXTENDED != 0 && (test & TEST_BRE))
+#else
+ if (test & TEST_BRE)
+#endif
+ {
+ test &= ~TEST_BRE;
+ flags = cflags;
+ state.which = "BRE";
+ }
+ else
+#endif
+#ifdef REG_EXTENDED
+ if (test & TEST_ERE)
+ {
+ test &= ~TEST_ERE;
+ flags = cflags | REG_EXTENDED;
+ state.which = "ERE";
+ }
+ else
+#endif
+#ifdef REG_AUGMENTED
+ if (test & TEST_ARE)
+ {
+ test &= ~TEST_ARE;
+ flags = cflags | REG_AUGMENTED;
+ state.which = "ARE";
+ }
+ else
+#endif
+#ifdef REG_LITERAL
+ if (test & TEST_LRE)
+ {
+ test &= ~TEST_LRE;
+ flags = cflags | REG_LITERAL;
+ state.which = "LRE";
+ }
+ else
+#endif
+#ifdef REG_SHELL
+ if (test & TEST_SRE)
+ {
+ test &= ~TEST_SRE;
+ flags = cflags | REG_SHELL;
+ state.which = "SRE";
+ }
+ else
+#ifdef REG_AUGMENTED
+ if (test & TEST_KRE)
+ {
+ test &= ~TEST_KRE;
+ flags = cflags | REG_SHELL | REG_AUGMENTED;
+ state.which = "KRE";
+ }
+ else
+#endif
+#endif
+ {
+ if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY))
+ extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test|TEST_OK);
+ continue;
+ }
+ if ((test & (TEST_QUERY|TEST_VERBOSE|TEST_VERIFY)) == TEST_VERBOSE)
+ {
+ printf("test %-3d %s ", state.lineno, state.which);
+ quote(re, -1, test|TEST_DELIMIT);
+ printf(" ");
+ quote(s, nstr, test|TEST_DELIMIT);
+ printf("\n");
+ }
+
+ nosub:
+ fun = "regcomp";
+#if _REG_nexec
+ if (nstr >= 0 && nstr != strlen(s))
+ nexec = nstr;
+
+ else
+#endif
+ nexec = -1;
+ if (state.extracted || (skip & level))
+ continue;
+ if (!(test & TEST_QUERY))
+ testno++;
+#ifdef REG_DISCIPLINE
+ if (state.stack)
+ stkset(stkstd, state.stack, 0);
+ flags |= REG_DISCIPLINE;
+ state.disc.ordinal = 0;
+ sfstrseek(state.disc.sp, 0, SEEK_SET);
+#endif
+ if (!(test & TEST_CATCH))
+ cret = regcomp(&preg, re, flags);
+ else if (!(cret = setjmp(state.gotcha)))
+ {
+ alarm(HUNG);
+ cret = regcomp(&preg, re, flags);
+ alarm(0);
+ }
+#if _REG_subcomp
+ if (!cret && (test & TEST_SUB))
+ {
+ fun = "regsubcomp";
+ p = re + preg.re_npat;
+ if (!(test & TEST_CATCH))
+ cret = regsubcomp(&preg, p, NiL, 0, 0);
+ else if (!(cret = setjmp(state.gotcha)))
+ {
+ alarm(HUNG);
+ cret = regsubcomp(&preg, p, NiL, 0, 0);
+ alarm(0);
+ }
+ if (!cret && *(p += preg.re_npat) && !(preg.re_sub->re_flags & REG_SUB_LAST))
+ {
+ if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
+ continue;
+ cret = REG_EFLAGS;
+ }
+ }
+#endif
+#if _REG_decomp
+ if (!cret && (test & TEST_DECOMP))
+ {
+ char buf[128];
+
+ if ((j = nmatch) > sizeof(buf))
+ j = sizeof(buf);
+ fun = "regdecomp";
+ p = re + preg.re_npat;
+ if (!(test & TEST_CATCH))
+ i = regdecomp(&preg, -1, buf, j);
+ else if (!(cret = setjmp(state.gotcha)))
+ {
+ alarm(HUNG);
+ i = regdecomp(&preg, -1, buf, j);
+ alarm(0);
+ }
+ if (!cret)
+ {
+ catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+ if (i > j)
+ {
+ if (i != (strlen(ans) + 1))
+ {
+ report("failed", fun, re, s, nstr, msg, flags, test);
+ printf(" %d byte buffer supplied, %d byte buffer required\n", j, i);
+ }
+ }
+ else if (strcmp(buf, ans))
+ {
+ report("failed", fun, re, s, nstr, msg, flags, test);
+ quote(ans, -1, test|TEST_DELIMIT);
+ printf(" expected, ");
+ quote(buf, -1, test|TEST_DELIMIT);
+ printf(" returned\n");
+ }
+ continue;
+ }
+ }
+#endif
+ if (!cret)
+ {
+ if (!(flags & REG_NOSUB) && nsub < 0 && *ans == '(')
+ {
+ for (p = ans; *p; p++)
+ if (*p == '(')
+ nsub++;
+ else if (*p == '{')
+ nsub--;
+ if (nsub >= 0)
+ {
+ if (test & TEST_IGNORE_OVER)
+ {
+ if (nmatch > nsub)
+ nmatch = nsub + 1;
+ }
+ else if (nsub != preg.re_nsub)
+ {
+ if (nsub > preg.re_nsub)
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("re_nsub incorrect", fun, re, NiL, -1, msg, flags, test);
+ printf("at least %d expected, %d returned\n", nsub, preg.re_nsub);
+ state.errors++;
+ }
+ }
+ else
+ nsub = preg.re_nsub;
+ }
+ }
+ }
+ if (!(test & (TEST_DECOMP|TEST_SUB)) && *ans && *ans != '(' && !streq(ans, "OK") && !streq(ans, "NOMATCH"))
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+ else if (!(test & TEST_LENIENT))
+ {
+ report("failed", fun, re, NiL, -1, msg, flags, test);
+ printf("%s expected, OK returned\n", ans);
+ }
+ catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+ continue;
+ }
+ }
+ else
+ {
+ if (test & TEST_LENIENT)
+ /* we'll let it go this time */;
+ else if (!*ans || ans[0]=='(' || cret == REG_BADPAT && streq(ans, "NOMATCH"))
+ {
+ got = 0;
+ for (i = 1; i < elementsof(codes); i++)
+ if (cret==codes[i].code)
+ got = i;
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("failed", fun, re, NiL, -1, msg, flags, test);
+ printf("%s returned: ", codes[got].name);
+ error(&preg, cret);
+ }
+ }
+ else
+ {
+ expected = got = 0;
+ for (i = 1; i < elementsof(codes); i++)
+ {
+ if (streq(ans, codes[i].name))
+ expected = i;
+ if (cret==codes[i].code)
+ got = i;
+ }
+ if (!expected)
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("failed: invalid error code", NiL, re, NiL, -1, msg, flags, test);
+ printf("%s expected, %s returned\n", ans, codes[got].name);
+ }
+ }
+ else if (cret != codes[expected].code && cret != REG_BADPAT)
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+ else if (test & TEST_IGNORE_ERROR)
+ state.ignored++;
+ else
+ {
+ report("should fail and did", fun, re, NiL, -1, msg, flags, test);
+ printf("%s expected, %s returned: ", ans, codes[got].name);
+ state.errors--;
+ state.warnings++;
+ error(&preg, cret);
+ }
+ }
+ }
+ goto compile;
+ }
+
+#if _REG_nexec
+ execute:
+ if (nexec >= 0)
+ fun = "regnexec";
+ else
+#endif
+ fun = "regexec";
+
+ for (i = 0; i < elementsof(match); i++)
+ match[i] = state.NOMATCH;
+
+#if _REG_nexec
+ if (nexec >= 0)
+ {
+ eret = regnexec(&preg, s, nexec, nmatch, match, eflags);
+ s[nexec] = 0;
+ }
+ else
+#endif
+ {
+ if (!(test & TEST_CATCH))
+ eret = regexec(&preg, s, nmatch, match, eflags);
+ else if (!(eret = setjmp(state.gotcha)))
+ {
+ alarm(HUNG);
+ eret = regexec(&preg, s, nmatch, match, eflags);
+ alarm(0);
+ }
+ }
+#if _REG_subcomp
+ if ((test & TEST_SUB) && !eret)
+ {
+ fun = "regsubexec";
+ if (!(test & TEST_CATCH))
+ eret = regsubexec(&preg, s, nmatch, match);
+ else if (!(eret = setjmp(state.gotcha)))
+ {
+ alarm(HUNG);
+ eret = regsubexec(&preg, s, nmatch, match);
+ alarm(0);
+ }
+ }
+#endif
+ if (flags & REG_NOSUB)
+ {
+ if (eret)
+ {
+ if (eret != REG_NOMATCH || !streq(ans, "NOMATCH"))
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("REG_NOSUB failed", fun, re, s, nstr, msg, flags, test);
+ error(&preg, eret);
+ }
+ }
+ }
+ else if (streq(ans, "NOMATCH"))
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("should fail and didn't", fun, re, s, nstr, msg, flags, test);
+ error(&preg, eret);
+ }
+ }
+ }
+ else if (eret)
+ {
+ if (eret != REG_NOMATCH || !streq(ans, "NOMATCH"))
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, nsub, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("failed", fun, re, s, nstr, msg, flags, test);
+ if (eret != REG_NOMATCH)
+ error(&preg, eret);
+ else if (*ans)
+ printf("expected: %s\n", ans);
+ else
+ printf("\n");
+ }
+ }
+ }
+ else if (streq(ans, "NOMATCH"))
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
+ else
+ {
+ report("should fail and didn't", fun, re, s, nstr, msg, flags, test);
+ matchprint(match, nmatch, nsub, NiL, test);
+ }
+ }
+#if _REG_subcomp
+ else if (test & TEST_SUB)
+ {
+ p = preg.re_sub->re_buf;
+ if (strcmp(p, ans))
+ {
+ report("failed", fun, re, s, nstr, msg, flags, test);
+ quote(ans, -1, test|TEST_DELIMIT);
+ printf(" expected, ");
+ quote(p, -1, test|TEST_DELIMIT);
+ printf(" returned\n");
+ }
+ }
+#endif
+ else if (!*ans)
+ {
+ if (match[0].rm_so != state.NOMATCH.rm_so)
+ {
+ if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+ else
+ {
+ report("failed: no match but match array assigned", NiL, re, s, nstr, msg, flags, test);
+ matchprint(match, nmatch, nsub, NiL, test);
+ }
+ }
+ }
+ else if (matchcheck(match, nmatch, nsub, ans, re, s, nstr, flags, test))
+ {
+#if _REG_nexec
+ if (nexec < 0 && !nonexec)
+ {
+ nexec = nstr >= 0 ? nstr : strlen(s);
+ s[nexec] = '\n';
+ testno++;
+ goto execute;
+ }
+#endif
+ if (!(test & (TEST_DECOMP|TEST_SUB|TEST_VERIFY)) && !nonosub)
+ {
+ if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
+ continue;
+ flags |= REG_NOSUB;
+ goto nosub;
+ }
+ if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_OK);
+ }
+ else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+ skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
+ if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
+ continue;
+ goto compile;
+ }
+ if (test & TEST_SUMMARY)
+ printf("tests=%-4d errors=%-4d warnings=%-2d ignored=%-2d unspecified=%-2d signals=%d\n", testno, state.errors, state.warnings, state.ignored, state.unspecified, state.signals);
+ else if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS)))
+ {
+ printf("TEST\t%s", unit);
+ if (subunit)
+ printf(" %-.*s", subunitlen, subunit);
+ printf(", %d test%s", testno, testno == 1 ? "" : "s");
+ if (state.ignored)
+ printf(", %d ignored mismatche%s", state.ignored, state.ignored == 1 ? "" : "s");
+ if (state.warnings)
+ printf(", %d warning%s", state.warnings, state.warnings == 1 ? "" : "s");
+ if (state.unspecified)
+ printf(", %d unspecified difference%s", state.unspecified, state.unspecified == 1 ? "" : "s");
+ if (state.signals)
+ printf(", %d signal%s", state.signals, state.signals == 1 ? "" : "s");
+ printf(", %d error%s\n", state.errors, state.errors == 1 ? "" : "s");
+ }
+ if (fp != stdin)
+ fclose(fp);
+ }
+ return 0;
+}