summaryrefslogtreecommitdiff
path: root/src/regexp
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2014-09-30 12:08:09 -0400
committerRuss Cox <rsc@golang.org>2014-09-30 12:08:09 -0400
commit862ae4767fc1a27431b397cfd260b68aec97eda0 (patch)
treefdc5fa6da57f73e85f649b325e09438e14d9c475 /src/regexp
parent5b549474bf6a022c85a2d78f5c07b519d6f7baf1 (diff)
downloadgo-862ae4767fc1a27431b397cfd260b68aec97eda0.tar.gz
regexp/syntax: reject large repetitions created by nesting small ones
Fixes issue 7609. LGTM=r R=r CC=golang-codereviews https://codereview.appspot.com/150270043
Diffstat (limited to 'src/regexp')
-rw-r--r--src/regexp/syntax/parse.go34
-rw-r--r--src/regexp/syntax/parse_test.go13
2 files changed, 47 insertions, 0 deletions
diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
index 08f8d307a..3dc8ccf50 100644
--- a/src/regexp/syntax/parse.go
+++ b/src/regexp/syntax/parse.go
@@ -244,6 +244,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
if sub.Op >= opPseudo {
return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
}
+
re := p.newRegexp(op)
re.Min = min
re.Max = max
@@ -251,9 +252,42 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
re.Sub = re.Sub0[:1]
re.Sub[0] = sub
p.stack[n-1] = re
+
+ if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
+ return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
+ }
+
return after, nil
}
+// repeatIsValid reports whether the repetition re is valid.
+// Valid means that the combination of the top-level repetition
+// and any inner repetitions does not exceed n copies of the
+// innermost thing.
+// This function rewalks the regexp tree and is called for every repetition,
+// so we have to worry about inducing quadratic behavior in the parser.
+// We avoid this by only calling repeatIsValid when min or max >= 2.
+// In that case the depth of any >= 2 nesting can only get to 9 without
+// triggering a parse error, so each subtree can only be rewalked 9 times.
+func repeatIsValid(re *Regexp, n int) bool {
+ if re.Op == OpRepeat {
+ m := re.Max
+ if m < 0 {
+ m = re.Min
+ }
+ if m > n {
+ return false
+ }
+ n /= m
+ }
+ for _, sub := range re.Sub {
+ if !repeatIsValid(sub, n) {
+ return false
+ }
+ }
+ return true
+}
+
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp {
p.maybeConcat(-1, 0)
diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go
index f3089294c..c4a1117ff 100644
--- a/src/regexp/syntax/parse_test.go
+++ b/src/regexp/syntax/parse_test.go
@@ -200,6 +200,10 @@ var parseTests = []parseTest{
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
{`x{2}y|x{2}[0-9]y`,
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
+
+ // Valid repetitions.
+ {`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
+ {`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
}
const testFlags = MatchNL | PerlX | UnicodeGroups
@@ -262,6 +266,10 @@ func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
+ if tt.Dump == "" {
+ // It parsed. That's all we care about.
+ continue
+ }
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
@@ -470,6 +478,7 @@ var invalidRegexps = []string{
`(?i)[a-Z]`,
`a{100000}`,
`a{100000,}`,
+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
}
var onlyPerl = []string{
@@ -527,6 +536,10 @@ func TestToStringEquivalentParse(t *testing.T) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
+ if tt.Dump == "" {
+ // It parsed. That's all we care about.
+ continue
+ }
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)