From a374d54b02eac896dbf836d4b2dc878bdf7ea755 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Thu, 6 Nov 2014 09:57:46 +1100 Subject: bufio: don't loop generating empty tokens The new rules for split functions mean that we are exposed to the common bug of a function that loops forever at EOF. Pick these off by shutting down the scanner if too many consecutive empty tokens are delivered. Fixes issue 9020. LGTM=rsc, adg R=golang-codereviews, rsc, adg, bradfitz CC=golang-codereviews https://codereview.appspot.com/169970043 --- src/bufio/scan.go | 12 +++++++++++ src/bufio/scan_test.go | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'src/bufio') diff --git a/src/bufio/scan.go b/src/bufio/scan.go index a41451524..73ad763b8 100644 --- a/src/bufio/scan.go +++ b/src/bufio/scan.go @@ -36,6 +36,7 @@ type Scanner struct { start int // First non-processed byte in buf. end int // End of data in buf. err error // Sticky error. + empties int // Count of successive empty tokens. } // SplitFunc is the signature of the split function used to tokenize the @@ -108,6 +109,8 @@ func (s *Scanner) Text() string { // After Scan returns false, the Err method will return any error that // occurred during scanning, except that if it was io.EOF, Err // will return nil. +// Split panics if the split function returns 100 empty tokens without +// advancing the input. This is a common error mode for scanners. func (s *Scanner) Scan() bool { // Loop until we have a token. for { @@ -125,6 +128,14 @@ func (s *Scanner) Scan() bool { } s.token = token if token != nil { + if len(token) > 0 { + s.empties = 0 + } else { + s.empties++ + if s.empties > 100 { + panic("bufio.Scan: 100 empty tokens without progressing") + } + } return true } } @@ -172,6 +183,7 @@ func (s *Scanner) Scan() bool { break } if n > 0 { + s.empties = 0 break } loop++ diff --git a/src/bufio/scan_test.go b/src/bufio/scan_test.go index 1454a8113..a1cf90ddb 100644 --- a/src/bufio/scan_test.go +++ b/src/bufio/scan_test.go @@ -455,3 +455,61 @@ func TestEmptyTokens(t *testing.T) { t.Fatal(err) } } + +func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + if len(data) > 0 { + return 1, data[:1], nil + } + return 0, data, nil +} + +func TestDontLoopForever(t *testing.T) { + s := NewScanner(strings.NewReader("abc")) + s.Split(loopAtEOFSplit) + // Expect a panic + panicked := true + defer func() { + err := recover() + if err == nil { + t.Fatal("should have panicked") + } + if msg, ok := err.(string); ok && strings.Contains(msg, "empty tokens") { + panicked = true + } else { + panic(err) + } + }() + for count := 0; s.Scan(); count++ { + if count > 1000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +type countdown int + +func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) { + if *c > 0 { + *c-- + return 1, data[:1], nil + } + return 0, nil, nil +} + +// Check that the looping-at-EOF check doesn't trigger for merely empty tokens. +func TestEmptyLinesOK(t *testing.T) { + c := countdown(10000) + s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000))) + s.Split(c.split) + for s.Scan() { + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } + if c != 0 { + t.Fatalf("stopped with %d left to process", c) + } +} -- cgit v1.2.1