summaryrefslogtreecommitdiff
path: root/src/pkg/encoding/csv/reader.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/encoding/csv/reader.go')
-rw-r--r--src/pkg/encoding/csv/reader.go337
1 files changed, 0 insertions, 337 deletions
diff --git a/src/pkg/encoding/csv/reader.go b/src/pkg/encoding/csv/reader.go
deleted file mode 100644
index d9432954a..000000000
--- a/src/pkg/encoding/csv/reader.go
+++ /dev/null
@@ -1,337 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package csv reads and writes comma-separated values (CSV) files.
-//
-// A csv file contains zero or more records of one or more fields per record.
-// Each record is separated by the newline character. The final record may
-// optionally be followed by a newline character.
-//
-// field1,field2,field3
-//
-// White space is considered part of a field.
-//
-// Carriage returns before newline characters are silently removed.
-//
-// Blank lines are ignored. A line with only whitespace characters (excluding
-// the ending newline character) is not considered a blank line.
-//
-// Fields which start and stop with the quote character " are called
-// quoted-fields. The beginning and ending quote are not part of the
-// field.
-//
-// The source:
-//
-// normal string,"quoted-field"
-//
-// results in the fields
-//
-// {`normal string`, `quoted-field`}
-//
-// Within a quoted-field a quote character followed by a second quote
-// character is considered a single quote.
-//
-// "the ""word"" is true","a ""quoted-field"""
-//
-// results in
-//
-// {`the "word" is true`, `a "quoted-field"`}
-//
-// Newlines and commas may be included in a quoted-field
-//
-// "Multi-line
-// field","comma is ,"
-//
-// results in
-//
-// {`Multi-line
-// field`, `comma is ,`}
-package csv
-
-import (
- "bufio"
- "bytes"
- "errors"
- "fmt"
- "io"
- "unicode"
-)
-
-// A ParseError is returned for parsing errors.
-// The first line is 1. The first column is 0.
-type ParseError struct {
- Line int // Line where the error occurred
- Column int // Column (rune index) where the error occurred
- Err error // The actual error
-}
-
-func (e *ParseError) Error() string {
- return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
-}
-
-// These are the errors that can be returned in ParseError.Error
-var (
- ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
- ErrBareQuote = errors.New("bare \" in non-quoted-field")
- ErrQuote = errors.New("extraneous \" in field")
- ErrFieldCount = errors.New("wrong number of fields in line")
-)
-
-// A Reader reads records from a CSV-encoded file.
-//
-// As returned by NewReader, a Reader expects input conforming to RFC 4180.
-// The exported fields can be changed to customize the details before the
-// first call to Read or ReadAll.
-//
-// Comma is the field delimiter. It defaults to ','.
-//
-// Comment, if not 0, is the comment character. Lines beginning with the
-// Comment character are ignored.
-//
-// If FieldsPerRecord is positive, Read requires each record to
-// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
-// the number of fields in the first record, so that future records must
-// have the same field count. If FieldsPerRecord is negative, no check is
-// made and records may have a variable number of fields.
-//
-// If LazyQuotes is true, a quote may appear in an unquoted field and a
-// non-doubled quote may appear in a quoted field.
-//
-// If TrimLeadingSpace is true, leading white space in a field is ignored.
-type Reader struct {
- Comma rune // field delimiter (set to ',' by NewReader)
- Comment rune // comment character for start of line
- FieldsPerRecord int // number of expected fields per record
- LazyQuotes bool // allow lazy quotes
- TrailingComma bool // ignored; here for backwards compatibility
- TrimLeadingSpace bool // trim leading space
- line int
- column int
- r *bufio.Reader
- field bytes.Buffer
-}
-
-// NewReader returns a new Reader that reads from r.
-func NewReader(r io.Reader) *Reader {
- return &Reader{
- Comma: ',',
- r: bufio.NewReader(r),
- }
-}
-
-// error creates a new ParseError based on err.
-func (r *Reader) error(err error) error {
- return &ParseError{
- Line: r.line,
- Column: r.column,
- Err: err,
- }
-}
-
-// Read reads one record from r. The record is a slice of strings with each
-// string representing one field.
-func (r *Reader) Read() (record []string, err error) {
- for {
- record, err = r.parseRecord()
- if record != nil {
- break
- }
- if err != nil {
- return nil, err
- }
- }
-
- if r.FieldsPerRecord > 0 {
- if len(record) != r.FieldsPerRecord {
- r.column = 0 // report at start of record
- return record, r.error(ErrFieldCount)
- }
- } else if r.FieldsPerRecord == 0 {
- r.FieldsPerRecord = len(record)
- }
- return record, nil
-}
-
-// ReadAll reads all the remaining records from r.
-// Each record is a slice of fields.
-// A successful call returns err == nil, not err == EOF. Because ReadAll is
-// defined to read until EOF, it does not treat end of file as an error to be
-// reported.
-func (r *Reader) ReadAll() (records [][]string, err error) {
- for {
- record, err := r.Read()
- if err == io.EOF {
- return records, nil
- }
- if err != nil {
- return nil, err
- }
- records = append(records, record)
- }
-}
-
-// readRune reads one rune from r, folding \r\n to \n and keeping track
-// of how far into the line we have read. r.column will point to the start
-// of this rune, not the end of this rune.
-func (r *Reader) readRune() (rune, error) {
- r1, _, err := r.r.ReadRune()
-
- // Handle \r\n here. We make the simplifying assumption that
- // anytime \r is followed by \n that it can be folded to \n.
- // We will not detect files which contain both \r\n and bare \n.
- if r1 == '\r' {
- r1, _, err = r.r.ReadRune()
- if err == nil {
- if r1 != '\n' {
- r.r.UnreadRune()
- r1 = '\r'
- }
- }
- }
- r.column++
- return r1, err
-}
-
-// skip reads runes up to and including the rune delim or until error.
-func (r *Reader) skip(delim rune) error {
- for {
- r1, err := r.readRune()
- if err != nil {
- return err
- }
- if r1 == delim {
- return nil
- }
- }
-}
-
-// parseRecord reads and parses a single csv record from r.
-func (r *Reader) parseRecord() (fields []string, err error) {
- // Each record starts on a new line. We increment our line
- // number (lines start at 1, not 0) and set column to -1
- // so as we increment in readRune it points to the character we read.
- r.line++
- r.column = -1
-
- // Peek at the first rune. If it is an error we are done.
- // If we are support comments and it is the comment character
- // then skip to the end of line.
-
- r1, _, err := r.r.ReadRune()
- if err != nil {
- return nil, err
- }
-
- if r.Comment != 0 && r1 == r.Comment {
- return nil, r.skip('\n')
- }
- r.r.UnreadRune()
-
- // At this point we have at least one field.
- for {
- haveField, delim, err := r.parseField()
- if haveField {
- fields = append(fields, r.field.String())
- }
- if delim == '\n' || err == io.EOF {
- return fields, err
- } else if err != nil {
- return nil, err
- }
- }
-}
-
-// parseField parses the next field in the record. The read field is
-// located in r.field. Delim is the first character not part of the field
-// (r.Comma or '\n').
-func (r *Reader) parseField() (haveField bool, delim rune, err error) {
- r.field.Reset()
-
- r1, err := r.readRune()
- for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) {
- r1, err = r.readRune()
- }
-
- if err == io.EOF && r.column != 0 {
- return true, 0, err
- }
- if err != nil {
- return false, 0, err
- }
-
- switch r1 {
- case r.Comma:
- // will check below
-
- case '\n':
- // We are a trailing empty field or a blank line
- if r.column == 0 {
- return false, r1, nil
- }
- return true, r1, nil
-
- case '"':
- // quoted field
- Quoted:
- for {
- r1, err = r.readRune()
- if err != nil {
- if err == io.EOF {
- if r.LazyQuotes {
- return true, 0, err
- }
- return false, 0, r.error(ErrQuote)
- }
- return false, 0, err
- }
- switch r1 {
- case '"':
- r1, err = r.readRune()
- if err != nil || r1 == r.Comma {
- break Quoted
- }
- if r1 == '\n' {
- return true, r1, nil
- }
- if r1 != '"' {
- if !r.LazyQuotes {
- r.column--
- return false, 0, r.error(ErrQuote)
- }
- // accept the bare quote
- r.field.WriteRune('"')
- }
- case '\n':
- r.line++
- r.column = -1
- }
- r.field.WriteRune(r1)
- }
-
- default:
- // unquoted field
- for {
- r.field.WriteRune(r1)
- r1, err = r.readRune()
- if err != nil || r1 == r.Comma {
- break
- }
- if r1 == '\n' {
- return true, r1, nil
- }
- if !r.LazyQuotes && r1 == '"' {
- return false, 0, r.error(ErrBareQuote)
- }
- }
- }
-
- if err != nil {
- if err == io.EOF {
- return true, 0, err
- }
- return false, 0, err
- }
-
- return true, r1, nil
-}