diff options
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r-- | libgo/go/regexp/all_test.go | 124 | ||||
-rw-r--r-- | libgo/go/regexp/exec.go | 31 | ||||
-rw-r--r-- | libgo/go/regexp/exec_test.go | 6 | ||||
-rw-r--r-- | libgo/go/regexp/onepass.go | 5 | ||||
-rw-r--r-- | libgo/go/regexp/regexp.go | 56 |
5 files changed, 178 insertions, 44 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go index 88391ff47de..beb46e70995 100644 --- a/libgo/go/regexp/all_test.go +++ b/libgo/go/regexp/all_test.go @@ -11,7 +11,7 @@ import ( "testing" ) -var good_re = []string{ +var goodRe = []string{ ``, `.`, `^.$`, @@ -36,7 +36,7 @@ type stringError struct { err string } -var bad_re = []stringError{ +var badRe = []stringError{ {`*`, "missing argument to repetition operator: `*`"}, {`+`, "missing argument to repetition operator: `+`"}, {`?`, "missing argument to repetition operator: `?`"}, @@ -64,14 +64,14 @@ func compileTest(t *testing.T, expr string, error string) *Regexp { } func TestGoodCompile(t *testing.T) { - for i := 0; i < len(good_re); i++ { - compileTest(t, good_re[i], "") + for i := 0; i < len(goodRe); i++ { + compileTest(t, goodRe[i], "") } } func TestBadCompile(t *testing.T) { - for i := 0; i < len(bad_re); i++ { - compileTest(t, bad_re[i].re, bad_re[i].err) + for i := 0; i < len(badRe); i++ { + compileTest(t, badRe[i].re, badRe[i].err) } } @@ -512,6 +512,32 @@ func TestSplit(t *testing.T) { } } +// The following sequence of Match calls used to panic. See issue #12980. +func TestParseAndCompile(t *testing.T) { + expr := "a$" + s := "a\nb" + + for i, tc := range []struct { + reFlags syntax.Flags + expMatch bool + }{ + {syntax.Perl | syntax.OneLine, false}, + {syntax.Perl &^ syntax.OneLine, true}, + } { + parsed, err := syntax.Parse(expr, tc.reFlags) + if err != nil { + t.Fatalf("%d: parse: %v", i, err) + } + re, err := Compile(parsed.String()) + if err != nil { + t.Fatalf("%d: compile: %v", i, err) + } + if match := re.MatchString(s); match != tc.expMatch { + t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch) + } + } +} + // Check that one-pass cutoff does trigger. func TestOnePassCutoff(t *testing.T) { re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl) @@ -538,6 +564,72 @@ func TestSwitchBacktrack(t *testing.T) { re.Match(long[:1]) // triggers backtracker } +func BenchmarkFind(b *testing.B) { + b.StopTimer() + re := MustCompile("a+b+") + wantSubs := "aaabb" + s := []byte("acbb" + wantSubs + "dd") + b.StartTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + subs := re.Find(s) + if string(subs) != wantSubs { + b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs) + } + } +} + +func BenchmarkFindString(b *testing.B) { + b.StopTimer() + re := MustCompile("a+b+") + wantSubs := "aaabb" + s := "acbb" + wantSubs + "dd" + b.StartTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + subs := re.FindString(s) + if subs != wantSubs { + b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs) + } + } +} + +func BenchmarkFindSubmatch(b *testing.B) { + b.StopTimer() + re := MustCompile("a(a+b+)b") + wantSubs := "aaabb" + s := []byte("acbb" + wantSubs + "dd") + b.StartTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + subs := re.FindSubmatch(s) + if string(subs[0]) != wantSubs { + b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs) + } + if string(subs[1]) != "aab" { + b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab") + } + } +} + +func BenchmarkFindStringSubmatch(b *testing.B) { + b.StopTimer() + re := MustCompile("a(a+b+)b") + wantSubs := "aaabb" + s := "acbb" + wantSubs + "dd" + b.StartTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + subs := re.FindStringSubmatch(s) + if subs[0] != wantSubs { + b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs) + } + if subs[1] != "aab" { + b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab") + } + } +} + func BenchmarkLiteral(b *testing.B) { x := strings.Repeat("x", 50) + "y" b.StopTimer() @@ -726,3 +818,23 @@ func BenchmarkMatchParallelCopied(b *testing.B) { } }) } + +var sink string + +func BenchmarkQuoteMetaAll(b *testing.B) { + s := string(specialBytes) + b.SetBytes(int64(len(s))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + sink = QuoteMeta(s) + } +} + +func BenchmarkQuoteMetaNone(b *testing.B) { + s := "abcdefghijklmnopqrstuvwxyz" + b.SetBytes(int64(len(s))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + sink = QuoteMeta(s) + } +} diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go index 4fd61b5d8d1..977619cb28a 100644 --- a/libgo/go/regexp/exec.go +++ b/libgo/go/regexp/exec.go @@ -405,14 +405,16 @@ func (m *machine) onepass(i input, pos int) bool { return m.matched } -// empty is a non-nil 0-element slice, -// so doExecute can avoid an allocation -// when 0 captures are requested from a successful match. -var empty = make([]int, 0) +// doMatch reports whether either r, b or s match the regexp. +func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool { + return re.doExecute(r, b, s, 0, 0, nil) != nil +} -// doExecute finds the leftmost match in the input and returns -// the position of its subexpressions. -func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int { +// doExecute finds the leftmost match in the input, appends the position +// of its subexpressions to dstCap and returns dstCap. +// +// nil is returned if no matches are found and non-nil if matches are found. +func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int { m := re.get() var i input var size int @@ -445,12 +447,15 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i return nil } } - if ncap == 0 { - re.put(m) - return empty // empty but not nil + dstCap = append(dstCap, m.matchcap...) + if dstCap == nil { + // Keep the promise of returning non-nil value on match. + dstCap = arrayNoInts[:0] } - cap := make([]int, len(m.matchcap)) - copy(cap, m.matchcap) re.put(m) - return cap + return dstCap } + +// arrayNoInts is returned by doExecute match if nil dstCap is passed +// to it with ncap=0. +var arrayNoInts [0]int diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index 69f187e38a8..766394de6ee 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -8,6 +8,7 @@ import ( "bufio" "compress/bzip2" "fmt" + "internal/testenv" "io" "os" "path/filepath" @@ -659,9 +660,14 @@ func makeText(n int) []byte { } func BenchmarkMatch(b *testing.B) { + isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race") + for _, data := range benchData { r := MustCompile(data.re) for _, size := range benchSizes { + if isRaceBuilder && size.n > 1<<10 { + continue + } t := makeText(size.n) b.Run(data.name+"/"+size.name, func(b *testing.B) { b.SetBytes(int64(size.n)) diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go index 49919548201..1b0564c3fd0 100644 --- a/libgo/go/regexp/onepass.go +++ b/libgo/go/regexp/onepass.go @@ -287,11 +287,6 @@ func (p runeSlice) Len() int { return len(p) } func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] } func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -// Sort is a convenience method. -func (p runeSlice) Sort() { - sort.Sort(p) -} - var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune} var anyRune = []rune{0, unicode.MaxRune} diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index fe3db9f78b0..01093d4bd0d 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -408,17 +408,17 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { // MatchReader reports whether the Regexp matches the text read by the // RuneReader. func (re *Regexp) MatchReader(r io.RuneReader) bool { - return re.doExecute(r, nil, "", 0, 0) != nil + return re.doMatch(r, nil, "") } // MatchString reports whether the Regexp matches the string s. func (re *Regexp) MatchString(s string) bool { - return re.doExecute(nil, nil, s, 0, 0) != nil + return re.doMatch(nil, nil, s) } // Match reports whether the Regexp matches the byte slice b. func (re *Regexp) Match(b []byte) bool { - return re.doExecute(nil, b, "", 0, 0) != nil + return re.doMatch(nil, b, "") } // MatchReader checks whether a textual regular expression matches the text @@ -502,8 +502,9 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst nmatch = re.prog.NumCap } + var dstCap [2]int for searchPos <= endPos { - a := re.doExecute(nil, bsrc, src, searchPos, nmatch) + a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0]) if len(a) == 0 { break // no more matches } @@ -599,11 +600,22 @@ func special(b byte) bool { // inside the argument text; the returned string is a regular expression matching // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. func QuoteMeta(s string) string { - b := make([]byte, 2*len(s)) - // A byte loop is correct because all metacharacters are ASCII. - j := 0 - for i := 0; i < len(s); i++ { + var i int + for i = 0; i < len(s); i++ { + if special(s[i]) { + break + } + } + // No meta characters found, so return original string. + if i >= len(s) { + return s + } + + b := make([]byte, 2*len(s)-i) + copy(b, s[:i]) + j := i + for ; i < len(s); i++ { if special(s[i]) { b[j] = '\\' j++ @@ -611,7 +623,7 @@ func QuoteMeta(s string) string { b[j] = s[i] j++ } - return string(b[0:j]) + return string(b[:j]) } // The number of capture values in the program may correspond @@ -641,7 +653,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { } for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { - matches := re.doExecute(nil, b, s, pos, re.prog.NumCap) + matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil) if len(matches) == 0 { break } @@ -681,7 +693,8 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { // Find returns a slice holding the text of the leftmost match in b of the regular expression. // A return value of nil indicates no match. func (re *Regexp) Find(b []byte) []byte { - a := re.doExecute(nil, b, "", 0, 2) + var dstCap [2]int + a := re.doExecute(nil, b, "", 0, 2, dstCap[:0]) if a == nil { return nil } @@ -693,7 +706,7 @@ func (re *Regexp) Find(b []byte) []byte { // b[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindIndex(b []byte) (loc []int) { - a := re.doExecute(nil, b, "", 0, 2) + a := re.doExecute(nil, b, "", 0, 2, nil) if a == nil { return nil } @@ -706,7 +719,8 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) { // an empty string. Use FindStringIndex or FindStringSubmatch if it is // necessary to distinguish these cases. func (re *Regexp) FindString(s string) string { - a := re.doExecute(nil, nil, s, 0, 2) + var dstCap [2]int + a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0]) if a == nil { return "" } @@ -718,7 +732,7 @@ func (re *Regexp) FindString(s string) string { // itself is at s[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindStringIndex(s string) (loc []int) { - a := re.doExecute(nil, nil, s, 0, 2) + a := re.doExecute(nil, nil, s, 0, 2, nil) if a == nil { return nil } @@ -731,7 +745,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) { // byte offset loc[0] through loc[1]-1. // A return value of nil indicates no match. func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { - a := re.doExecute(r, nil, "", 0, 2) + a := re.doExecute(r, nil, "", 0, 2, nil) if a == nil { return nil } @@ -744,7 +758,8 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { // comment. // A return value of nil indicates no match. func (re *Regexp) FindSubmatch(b []byte) [][]byte { - a := re.doExecute(nil, b, "", 0, re.prog.NumCap) + var dstCap [4]int + a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0]) if a == nil { return nil } @@ -891,7 +906,7 @@ func extract(str string) (name string, num int, rest string, ok bool) { // in the package comment. // A return value of nil indicates no match. func (re *Regexp) FindSubmatchIndex(b []byte) []int { - return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap)) + return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil)) } // FindStringSubmatch returns a slice of strings holding the text of the @@ -900,7 +915,8 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int { // package comment. // A return value of nil indicates no match. func (re *Regexp) FindStringSubmatch(s string) []string { - a := re.doExecute(nil, nil, s, 0, re.prog.NumCap) + var dstCap [4]int + a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0]) if a == nil { return nil } @@ -919,7 +935,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string { // 'Index' descriptions in the package comment. // A return value of nil indicates no match. func (re *Regexp) FindStringSubmatchIndex(s string) []int { - return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap)) + return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil)) } // FindReaderSubmatchIndex returns a slice holding the index pairs @@ -928,7 +944,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int { // by the 'Submatch' and 'Index' descriptions in the package comment. A // return value of nil indicates no match. func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { - return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap)) + return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil)) } const startSize = 10 // The size at which to start a slice in the 'All' routines. |