summaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go124
-rw-r--r--libgo/go/regexp/exec.go31
-rw-r--r--libgo/go/regexp/exec_test.go6
-rw-r--r--libgo/go/regexp/onepass.go5
-rw-r--r--libgo/go/regexp/regexp.go56
5 files changed, 178 insertions, 44 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 88391ff47de..beb46e70995 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -11,7 +11,7 @@ import (
"testing"
)
-var good_re = []string{
+var goodRe = []string{
``,
`.`,
`^.$`,
@@ -36,7 +36,7 @@ type stringError struct {
err string
}
-var bad_re = []stringError{
+var badRe = []stringError{
{`*`, "missing argument to repetition operator: `*`"},
{`+`, "missing argument to repetition operator: `+`"},
{`?`, "missing argument to repetition operator: `?`"},
@@ -64,14 +64,14 @@ func compileTest(t *testing.T, expr string, error string) *Regexp {
}
func TestGoodCompile(t *testing.T) {
- for i := 0; i < len(good_re); i++ {
- compileTest(t, good_re[i], "")
+ for i := 0; i < len(goodRe); i++ {
+ compileTest(t, goodRe[i], "")
}
}
func TestBadCompile(t *testing.T) {
- for i := 0; i < len(bad_re); i++ {
- compileTest(t, bad_re[i].re, bad_re[i].err)
+ for i := 0; i < len(badRe); i++ {
+ compileTest(t, badRe[i].re, badRe[i].err)
}
}
@@ -512,6 +512,32 @@ func TestSplit(t *testing.T) {
}
}
+// The following sequence of Match calls used to panic. See issue #12980.
+func TestParseAndCompile(t *testing.T) {
+ expr := "a$"
+ s := "a\nb"
+
+ for i, tc := range []struct {
+ reFlags syntax.Flags
+ expMatch bool
+ }{
+ {syntax.Perl | syntax.OneLine, false},
+ {syntax.Perl &^ syntax.OneLine, true},
+ } {
+ parsed, err := syntax.Parse(expr, tc.reFlags)
+ if err != nil {
+ t.Fatalf("%d: parse: %v", i, err)
+ }
+ re, err := Compile(parsed.String())
+ if err != nil {
+ t.Fatalf("%d: compile: %v", i, err)
+ }
+ if match := re.MatchString(s); match != tc.expMatch {
+ t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch)
+ }
+ }
+}
+
// Check that one-pass cutoff does trigger.
func TestOnePassCutoff(t *testing.T) {
re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
@@ -538,6 +564,72 @@ func TestSwitchBacktrack(t *testing.T) {
re.Match(long[:1]) // triggers backtracker
}
+func BenchmarkFind(b *testing.B) {
+ b.StopTimer()
+ re := MustCompile("a+b+")
+ wantSubs := "aaabb"
+ s := []byte("acbb" + wantSubs + "dd")
+ b.StartTimer()
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ subs := re.Find(s)
+ if string(subs) != wantSubs {
+ b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
+ }
+ }
+}
+
+func BenchmarkFindString(b *testing.B) {
+ b.StopTimer()
+ re := MustCompile("a+b+")
+ wantSubs := "aaabb"
+ s := "acbb" + wantSubs + "dd"
+ b.StartTimer()
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ subs := re.FindString(s)
+ if subs != wantSubs {
+ b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
+ }
+ }
+}
+
+func BenchmarkFindSubmatch(b *testing.B) {
+ b.StopTimer()
+ re := MustCompile("a(a+b+)b")
+ wantSubs := "aaabb"
+ s := []byte("acbb" + wantSubs + "dd")
+ b.StartTimer()
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ subs := re.FindSubmatch(s)
+ if string(subs[0]) != wantSubs {
+ b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
+ }
+ if string(subs[1]) != "aab" {
+ b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
+ }
+ }
+}
+
+func BenchmarkFindStringSubmatch(b *testing.B) {
+ b.StopTimer()
+ re := MustCompile("a(a+b+)b")
+ wantSubs := "aaabb"
+ s := "acbb" + wantSubs + "dd"
+ b.StartTimer()
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ subs := re.FindStringSubmatch(s)
+ if subs[0] != wantSubs {
+ b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
+ }
+ if subs[1] != "aab" {
+ b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
+ }
+ }
+}
+
func BenchmarkLiteral(b *testing.B) {
x := strings.Repeat("x", 50) + "y"
b.StopTimer()
@@ -726,3 +818,23 @@ func BenchmarkMatchParallelCopied(b *testing.B) {
}
})
}
+
+var sink string
+
+func BenchmarkQuoteMetaAll(b *testing.B) {
+ s := string(specialBytes)
+ b.SetBytes(int64(len(s)))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ sink = QuoteMeta(s)
+ }
+}
+
+func BenchmarkQuoteMetaNone(b *testing.B) {
+ s := "abcdefghijklmnopqrstuvwxyz"
+ b.SetBytes(int64(len(s)))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ sink = QuoteMeta(s)
+ }
+}
diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go
index 4fd61b5d8d1..977619cb28a 100644
--- a/libgo/go/regexp/exec.go
+++ b/libgo/go/regexp/exec.go
@@ -405,14 +405,16 @@ func (m *machine) onepass(i input, pos int) bool {
return m.matched
}
-// empty is a non-nil 0-element slice,
-// so doExecute can avoid an allocation
-// when 0 captures are requested from a successful match.
-var empty = make([]int, 0)
+// doMatch reports whether either r, b or s match the regexp.
+func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
+ return re.doExecute(r, b, s, 0, 0, nil) != nil
+}
-// doExecute finds the leftmost match in the input and returns
-// the position of its subexpressions.
-func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int {
+// doExecute finds the leftmost match in the input, appends the position
+// of its subexpressions to dstCap and returns dstCap.
+//
+// nil is returned if no matches are found and non-nil if matches are found.
+func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
m := re.get()
var i input
var size int
@@ -445,12 +447,15 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
return nil
}
}
- if ncap == 0 {
- re.put(m)
- return empty // empty but not nil
+ dstCap = append(dstCap, m.matchcap...)
+ if dstCap == nil {
+ // Keep the promise of returning non-nil value on match.
+ dstCap = arrayNoInts[:0]
}
- cap := make([]int, len(m.matchcap))
- copy(cap, m.matchcap)
re.put(m)
- return cap
+ return dstCap
}
+
+// arrayNoInts is returned by doExecute match if nil dstCap is passed
+// to it with ncap=0.
+var arrayNoInts [0]int
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
index 69f187e38a8..766394de6ee 100644
--- a/libgo/go/regexp/exec_test.go
+++ b/libgo/go/regexp/exec_test.go
@@ -8,6 +8,7 @@ import (
"bufio"
"compress/bzip2"
"fmt"
+ "internal/testenv"
"io"
"os"
"path/filepath"
@@ -659,9 +660,14 @@ func makeText(n int) []byte {
}
func BenchmarkMatch(b *testing.B) {
+ isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
+
for _, data := range benchData {
r := MustCompile(data.re)
for _, size := range benchSizes {
+ if isRaceBuilder && size.n > 1<<10 {
+ continue
+ }
t := makeText(size.n)
b.Run(data.name+"/"+size.name, func(b *testing.B) {
b.SetBytes(int64(size.n))
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 49919548201..1b0564c3fd0 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -287,11 +287,6 @@ func (p runeSlice) Len() int { return len(p) }
func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
-// Sort is a convenience method.
-func (p runeSlice) Sort() {
- sort.Sort(p)
-}
-
var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
var anyRune = []rune{0, unicode.MaxRune}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index fe3db9f78b0..01093d4bd0d 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -408,17 +408,17 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
// MatchReader reports whether the Regexp matches the text read by the
// RuneReader.
func (re *Regexp) MatchReader(r io.RuneReader) bool {
- return re.doExecute(r, nil, "", 0, 0) != nil
+ return re.doMatch(r, nil, "")
}
// MatchString reports whether the Regexp matches the string s.
func (re *Regexp) MatchString(s string) bool {
- return re.doExecute(nil, nil, s, 0, 0) != nil
+ return re.doMatch(nil, nil, s)
}
// Match reports whether the Regexp matches the byte slice b.
func (re *Regexp) Match(b []byte) bool {
- return re.doExecute(nil, b, "", 0, 0) != nil
+ return re.doMatch(nil, b, "")
}
// MatchReader checks whether a textual regular expression matches the text
@@ -502,8 +502,9 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
nmatch = re.prog.NumCap
}
+ var dstCap [2]int
for searchPos <= endPos {
- a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
+ a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
if len(a) == 0 {
break // no more matches
}
@@ -599,11 +600,22 @@ func special(b byte) bool {
// inside the argument text; the returned string is a regular expression matching
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
func QuoteMeta(s string) string {
- b := make([]byte, 2*len(s))
-
// A byte loop is correct because all metacharacters are ASCII.
- j := 0
- for i := 0; i < len(s); i++ {
+ var i int
+ for i = 0; i < len(s); i++ {
+ if special(s[i]) {
+ break
+ }
+ }
+ // No meta characters found, so return original string.
+ if i >= len(s) {
+ return s
+ }
+
+ b := make([]byte, 2*len(s)-i)
+ copy(b, s[:i])
+ j := i
+ for ; i < len(s); i++ {
if special(s[i]) {
b[j] = '\\'
j++
@@ -611,7 +623,7 @@ func QuoteMeta(s string) string {
b[j] = s[i]
j++
}
- return string(b[0:j])
+ return string(b[:j])
}
// The number of capture values in the program may correspond
@@ -641,7 +653,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
}
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
- matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
+ matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
if len(matches) == 0 {
break
}
@@ -681,7 +693,8 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
// A return value of nil indicates no match.
func (re *Regexp) Find(b []byte) []byte {
- a := re.doExecute(nil, b, "", 0, 2)
+ var dstCap [2]int
+ a := re.doExecute(nil, b, "", 0, 2, dstCap[:0])
if a == nil {
return nil
}
@@ -693,7 +706,7 @@ func (re *Regexp) Find(b []byte) []byte {
// b[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindIndex(b []byte) (loc []int) {
- a := re.doExecute(nil, b, "", 0, 2)
+ a := re.doExecute(nil, b, "", 0, 2, nil)
if a == nil {
return nil
}
@@ -706,7 +719,8 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
// necessary to distinguish these cases.
func (re *Regexp) FindString(s string) string {
- a := re.doExecute(nil, nil, s, 0, 2)
+ var dstCap [2]int
+ a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0])
if a == nil {
return ""
}
@@ -718,7 +732,7 @@ func (re *Regexp) FindString(s string) string {
// itself is at s[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindStringIndex(s string) (loc []int) {
- a := re.doExecute(nil, nil, s, 0, 2)
+ a := re.doExecute(nil, nil, s, 0, 2, nil)
if a == nil {
return nil
}
@@ -731,7 +745,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
// byte offset loc[0] through loc[1]-1.
// A return value of nil indicates no match.
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
- a := re.doExecute(r, nil, "", 0, 2)
+ a := re.doExecute(r, nil, "", 0, 2, nil)
if a == nil {
return nil
}
@@ -744,7 +758,8 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
// comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
- a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
+ var dstCap [4]int
+ a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0])
if a == nil {
return nil
}
@@ -891,7 +906,7 @@ func extract(str string) (name string, num int, rest string, ok bool) {
// in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
- return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
+ return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil))
}
// FindStringSubmatch returns a slice of strings holding the text of the
@@ -900,7 +915,8 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
// package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatch(s string) []string {
- a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
+ var dstCap [4]int
+ a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0])
if a == nil {
return nil
}
@@ -919,7 +935,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
// 'Index' descriptions in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
- return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
+ return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil))
}
// FindReaderSubmatchIndex returns a slice holding the index pairs
@@ -928,7 +944,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int {
// by the 'Submatch' and 'Index' descriptions in the package comment. A
// return value of nil indicates no match.
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
- return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
+ return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil))
}
const startSize = 10 // The size at which to start a slice in the 'All' routines.