summaryrefslogtreecommitdiff
path: root/libgo/go/encoding/csv
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/encoding/csv')
-rw-r--r--libgo/go/encoding/csv/reader.go69
-rw-r--r--libgo/go/encoding/csv/reader_test.go31
2 files changed, 80 insertions, 20 deletions
diff --git a/libgo/go/encoding/csv/reader.go b/libgo/go/encoding/csv/reader.go
index c8c4ca77583..a3497c84f96 100644
--- a/libgo/go/encoding/csv/reader.go
+++ b/libgo/go/encoding/csv/reader.go
@@ -110,6 +110,10 @@ type Reader struct {
// If TrimLeadingSpace is true, leading white space in a field is ignored.
// This is done even if the field delimiter, Comma, is white space.
TrimLeadingSpace bool
+ // ReuseRecord controls whether calls to Read may return a slice sharing
+ // the backing array of the previous call's returned slice for performance.
+ // By default, each call to Read returns newly allocated memory owned by the caller.
+ ReuseRecord bool
line int
column int
@@ -122,6 +126,9 @@ type Reader struct {
// Indexes of fields inside lineBuffer
// The i'th field starts at offset fieldIndexes[i] in lineBuffer.
fieldIndexes []int
+
+ // only used when ReuseRecord == true
+ lastRecord []string
}
// NewReader returns a new Reader that reads from r.
@@ -147,26 +154,17 @@ func (r *Reader) error(err error) error {
// Except for that case, Read always returns either a non-nil
// record or a non-nil error, but not both.
// If there is no data left to be read, Read returns nil, io.EOF.
+// If ReuseRecord is true, the returned slice may be shared
+// between multiple calls to Read.
func (r *Reader) Read() (record []string, err error) {
- for {
- record, err = r.parseRecord()
- if record != nil {
- break
- }
- if err != nil {
- return nil, err
- }
+ if r.ReuseRecord {
+ record, err = r.readRecord(r.lastRecord)
+ r.lastRecord = record
+ } else {
+ record, err = r.readRecord(nil)
}
- if r.FieldsPerRecord > 0 {
- if len(record) != r.FieldsPerRecord {
- r.column = 0 // report at start of record
- return record, r.error(ErrFieldCount)
- }
- } else if r.FieldsPerRecord == 0 {
- r.FieldsPerRecord = len(record)
- }
- return record, nil
+ return record, err
}
// ReadAll reads all the remaining records from r.
@@ -176,7 +174,7 @@ func (r *Reader) Read() (record []string, err error) {
// reported.
func (r *Reader) ReadAll() (records [][]string, err error) {
for {
- record, err := r.Read()
+ record, err := r.readRecord(nil)
if err == io.EOF {
return records, nil
}
@@ -187,6 +185,31 @@ func (r *Reader) ReadAll() (records [][]string, err error) {
}
}
+// readRecord reads and parses a single csv record from r.
+// Unlike parseRecord, readRecord handles FieldsPerRecord.
+// If dst has enough capacity it will be used for the returned record.
+func (r *Reader) readRecord(dst []string) (record []string, err error) {
+ for {
+ record, err = r.parseRecord(dst)
+ if record != nil {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if r.FieldsPerRecord > 0 {
+ if len(record) != r.FieldsPerRecord {
+ r.column = 0 // report at start of record
+ return record, r.error(ErrFieldCount)
+ }
+ } else if r.FieldsPerRecord == 0 {
+ r.FieldsPerRecord = len(record)
+ }
+ return record, nil
+}
+
// readRune reads one rune from r, folding \r\n to \n and keeping track
// of how far into the line we have read. r.column will point to the start
// of this rune, not the end of this rune.
@@ -223,7 +246,8 @@ func (r *Reader) skip(delim rune) error {
}
// parseRecord reads and parses a single csv record from r.
-func (r *Reader) parseRecord() (fields []string, err error) {
+// If dst has enough capacity it will be used for the returned fields.
+func (r *Reader) parseRecord(dst []string) (fields []string, err error) {
// Each record starts on a new line. We increment our line
// number (lines start at 1, not 0) and set column to -1
// so as we increment in readRune it points to the character we read.
@@ -275,7 +299,12 @@ func (r *Reader) parseRecord() (fields []string, err error) {
// minimal and a tradeoff for better performance through the combined
// allocations.
line := r.lineBuffer.String()
- fields = make([]string, fieldCount)
+
+ if cap(dst) >= fieldCount {
+ fields = dst[:fieldCount]
+ } else {
+ fields = make([]string, fieldCount)
+ }
for i, idx := range r.fieldIndexes {
if i == fieldCount-1 {
diff --git a/libgo/go/encoding/csv/reader_test.go b/libgo/go/encoding/csv/reader_test.go
index 7b3aca4c5f3..5ab1b61256c 100644
--- a/libgo/go/encoding/csv/reader_test.go
+++ b/libgo/go/encoding/csv/reader_test.go
@@ -24,6 +24,7 @@ var readTests = []struct {
LazyQuotes bool
TrailingComma bool
TrimLeadingSpace bool
+ ReuseRecord bool
Error string
Line int // Expected error line if != 0
@@ -260,6 +261,15 @@ x,,,
{"c", "d", "e"},
},
},
+ {
+ Name: "ReadAllReuseRecord",
+ ReuseRecord: true,
+ Input: "a,b\nc,d",
+ Output: [][]string{
+ {"a", "b"},
+ {"c", "d"},
+ },
+ },
}
func TestRead(t *testing.T) {
@@ -274,6 +284,7 @@ func TestRead(t *testing.T) {
r.LazyQuotes = tt.LazyQuotes
r.TrailingComma = tt.TrailingComma
r.TrimLeadingSpace = tt.TrimLeadingSpace
+ r.ReuseRecord = tt.ReuseRecord
if tt.Comma != 0 {
r.Comma = tt.Comma
}
@@ -369,3 +380,23 @@ xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzz
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
`, 3))
}
+
+func BenchmarkReadReuseRecord(b *testing.B) {
+ benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
+}
+
+func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
+ benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
+}
+
+func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
+ benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
+}
+
+func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
+ benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
+,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+`, 3))
+}