diff options
Diffstat (limited to 'libgo/go/bufio/scan_test.go')
-rw-r--r-- | libgo/go/bufio/scan_test.go | 134 |
1 files changed, 126 insertions, 8 deletions
diff --git a/libgo/go/bufio/scan_test.go b/libgo/go/bufio/scan_test.go index c1483b2685..eea87cbf7b 100644 --- a/libgo/go/bufio/scan_test.go +++ b/libgo/go/bufio/scan_test.go @@ -15,6 +15,8 @@ import ( "unicode/utf8" ) +const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Test white space table matches the Unicode definition. func TestSpace(t *testing.T) { for r := rune(0); r <= utf8.MaxRune; r++ { @@ -38,7 +40,7 @@ var scanTests = []string{ func TestScanByte(t *testing.T) { for n, test := range scanTests { - buf := bytes.NewBufferString(test) + buf := strings.NewReader(test) s := NewScanner(buf) s.Split(ScanBytes) var i int @@ -60,7 +62,7 @@ func TestScanByte(t *testing.T) { // Test that the rune splitter returns same sequence of runes (not bytes) as for range string. func TestScanRune(t *testing.T) { for n, test := range scanTests { - buf := bytes.NewBufferString(test) + buf := strings.NewReader(test) s := NewScanner(buf) s.Split(ScanRunes) var i, runeCount int @@ -104,7 +106,7 @@ var wordScanTests = []string{ // Test that the word splitter returns the same data as strings.Fields. func TestScanWords(t *testing.T) { for n, test := range wordScanTests { - buf := bytes.NewBufferString(test) + buf := strings.NewReader(test) s := NewScanner(buf) s.Split(ScanWords) words := strings.Fields(test) @@ -135,7 +137,7 @@ func TestScanWords(t *testing.T) { // reads in Scanner.Scan. type slowReader struct { max int - buf *bytes.Buffer + buf io.Reader } func (sr *slowReader) Read(p []byte) (n int, err error) { @@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { // Test the line splitter, including some carriage returns but no long lines. func TestScanLongLines(t *testing.T) { - const smallMaxTokenSize = 256 // Much smaller for more efficient testing. // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. tmp := new(bytes.Buffer) buf := new(bytes.Buffer) @@ -248,7 +249,7 @@ func TestScanLineTooLong(t *testing.T) { // Test that the line splitter handles a final line without a newline. func testNoNewline(text string, lines []string, t *testing.T) { - buf := bytes.NewBufferString(text) + buf := strings.NewReader(text) s := NewScanner(&slowReader{7, buf}) s.Split(ScanLines) for lineNum := 0; s.Scan(); lineNum++ { @@ -277,7 +278,7 @@ func TestScanLineNoNewline(t *testing.T) { testNoNewline(text, lines, t) } -// Test that the line splitter handles a final line with a carriage return but nonewline. +// Test that the line splitter handles a final line with a carriage return but no newline. func TestScanLineReturnButNoNewline(t *testing.T) { const text = "abcdefghijklmn\nopqrstuvwxyz\r" lines := []string{ @@ -328,7 +329,7 @@ func TestSplitError(t *testing.T) { } // Read the data. const text = "abcdefghijklmnopqrstuvwxyz" - buf := bytes.NewBufferString(text) + buf := strings.NewReader(text) s := NewScanner(&slowReader{1, buf}) s.Split(errorSplit) var i int @@ -404,3 +405,120 @@ func TestBadReader(t *testing.T) { t.Errorf("unexpected error: %v", err) } } + +func TestScanWordsExcessiveWhiteSpace(t *testing.T) { + const word = "ipsum" + s := strings.Repeat(" ", 4*smallMaxTokenSize) + word + scanner := NewScanner(strings.NewReader(s)) + scanner.MaxTokenSize(smallMaxTokenSize) + scanner.Split(ScanWords) + if !scanner.Scan() { + t.Fatalf("scan failed: %v", scanner.Err()) + } + if token := scanner.Text(); token != word { + t.Fatalf("unexpected token: %v", token) + } +} + +// Test that empty tokens, including at end of line or end of file, are found by the scanner. +// Issue 8672: Could miss final empty token. + +func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + for i := 0; i < len(data); i++ { + if data[i] == ',' { + return i + 1, data[:i], nil + } + } + if !atEOF { + return 0, nil, nil + } + return 0, data, nil +} + +func TestEmptyTokens(t *testing.T) { + s := NewScanner(strings.NewReader("1,2,3,")) + values := []string{"1", "2", "3", ""} + s.Split(commaSplit) + var i int + for i = 0; i < len(values); i++ { + if !s.Scan() { + break + } + if s.Text() != values[i] { + t.Errorf("%d: expected %q got %q", i, values[i], s.Text()) + } + } + if i != len(values) { + t.Errorf("got %d fields, expected %d", i, len(values)) + } + if err := s.Err(); err != nil { + t.Fatal(err) + } +} + +func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + if len(data) > 0 { + return 1, data[:1], nil + } + return 0, data, nil +} + +func TestDontLoopForever(t *testing.T) { + s := NewScanner(strings.NewReader("abc")) + s.Split(loopAtEOFSplit) + // Expect a panic + defer func() { + err := recover() + if err == nil { + t.Fatal("should have panicked") + } + if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") { + panic(err) + } + }() + for count := 0; s.Scan(); count++ { + if count > 1000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +func TestBlankLines(t *testing.T) { + s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000))) + for count := 0; s.Scan(); count++ { + if count > 2000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +type countdown int + +func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) { + if *c > 0 { + *c-- + return 1, data[:1], nil + } + return 0, nil, nil +} + +// Check that the looping-at-EOF check doesn't trigger for merely empty tokens. +func TestEmptyLinesOK(t *testing.T) { + c := countdown(10000) + s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000))) + s.Split(c.split) + for s.Scan() { + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } + if c != 0 { + t.Fatalf("stopped with %d left to process", c) + } +} |