diff options
Diffstat (limited to 'libgo/go/mime/multipart/multipart.go')
-rw-r--r-- | libgo/go/mime/multipart/multipart.go | 219 |
1 files changed, 108 insertions, 111 deletions
diff --git a/libgo/go/mime/multipart/multipart.go b/libgo/go/mime/multipart/multipart.go index 205348ca89d..19548081769 100644 --- a/libgo/go/mime/multipart/multipart.go +++ b/libgo/go/mime/multipart/multipart.go @@ -42,9 +42,7 @@ type Part struct { // during Read calls. Header textproto.MIMEHeader - buffer *bytes.Buffer - mr *Reader - bytesRead int + mr *Reader disposition string dispositionParams map[string]string @@ -53,6 +51,11 @@ type Part struct { // wrapper around such a reader, decoding the // Content-Transfer-Encoding r io.Reader + + n int // known data bytes waiting in mr.bufReader + total int64 // total data bytes read already + err error // error to return when n == 0 + readErr error // read error observed from mr.bufReader } // FormName returns the name parameter if p has a Content-Disposition @@ -126,7 +129,6 @@ func newPart(mr *Reader) (*Part, error) { bp := &Part{ Header: make(map[string][]string), mr: mr, - buffer: new(bytes.Buffer), } if err := bp.populateHeaders(); err != nil { return nil, err @@ -161,65 +163,118 @@ type partReader struct { p *Part } -func (pr partReader) Read(d []byte) (n int, err error) { +func (pr partReader) Read(d []byte) (int, error) { p := pr.p - defer func() { - p.bytesRead += n - }() - if p.buffer.Len() >= len(d) { - // Internal buffer of unconsumed data is large enough for - // the read request. No need to parse more at the moment. - return p.buffer.Read(d) + br := p.mr.bufReader + + // Read into buffer until we identify some data to return, + // or we find a reason to stop (boundary or read error). + for p.n == 0 && p.err == nil { + peek, _ := br.Peek(br.Buffered()) + p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) + if p.n == 0 && p.err == nil { + // Force buffered I/O to read more into buffer. + _, p.readErr = br.Peek(len(peek) + 1) + if p.readErr == io.EOF { + p.readErr = io.ErrUnexpectedEOF + } + } } - peek, err := p.mr.bufReader.Peek(peekBufferSize) // TODO(bradfitz): add buffer size accessor - - // Look for an immediate empty part without a leading \r\n - // before the boundary separator. Some MIME code makes empty - // parts like this. Most browsers, however, write the \r\n - // before the subsequent boundary even for empty parts and - // won't hit this path. - if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { - return 0, io.EOF + + // Read out from "data to return" part of buffer. + if p.n == 0 { + return 0, p.err } - unexpectedEOF := err == io.EOF - if err != nil && !unexpectedEOF { - return 0, fmt.Errorf("multipart: Part Read: %v", err) + n := len(d) + if n > p.n { + n = p.n } - if peek == nil { - panic("nil peek buf") + n, _ = br.Read(d[:n]) + p.total += int64(n) + p.n -= n + if p.n == 0 { + return n, p.err } - // Search the peek buffer for "\r\n--boundary". If found, - // consume everything up to the boundary. If not, consume only - // as much of the peek buffer as cannot hold the boundary - // string. - nCopy := 0 - foundBoundary := false - if idx, isEnd := p.mr.peekBufferSeparatorIndex(peek); idx != -1 { - nCopy = idx - foundBoundary = isEnd - if !isEnd && nCopy == 0 { - nCopy = 1 // make some progress. + return n, nil +} + +// scanUntilBoundary scans buf to identify how much of it can be safely +// returned as part of the Part body. +// dashBoundary is "--boundary". +// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. +// The comments below (and the name) assume "\n--boundary", but either is accepted. +// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. +// readErr is the read error, if any, that followed reading the bytes in buf. +// scanUntilBoundary returns the number of data bytes from buf that can be +// returned as part of the Part body and also the error to return (if any) +// once those data bytes are done. +func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { + if total == 0 { + // At beginning of body, allow dashBoundary. + if bytes.HasPrefix(buf, dashBoundary) { + switch matchAfterPrefix(buf, dashBoundary, readErr) { + case -1: + return len(dashBoundary), nil + case 0: + return 0, nil + case +1: + return 0, io.EOF + } + } + if bytes.HasPrefix(dashBoundary, buf) { + return 0, readErr } - } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { - nCopy = safeCount - } else if unexpectedEOF { - // If we've run out of peek buffer and the boundary - // wasn't found (and can't possibly fit), we must have - // hit the end of the file unexpectedly. - return 0, io.ErrUnexpectedEOF } - if nCopy > 0 { - if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { - return 0, err + + // Search for "\n--boundary". + if i := bytes.Index(buf, nlDashBoundary); i >= 0 { + switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { + case -1: + return i + len(nlDashBoundary), nil + case 0: + return i, nil + case +1: + return i, io.EOF + } + } + if bytes.HasPrefix(nlDashBoundary, buf) { + return 0, readErr + } + + // Otherwise, anything up to the final \n is not part of the boundary + // and so must be part of the body. + // Also if the section from the final \n onward is not a prefix of the boundary, + // it too must be part of the body. + i := bytes.LastIndexByte(buf, nlDashBoundary[0]) + if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { + return i, nil + } + return len(buf), readErr +} + +// matchAfterPrefix checks whether buf should be considered to match the boundary. +// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", +// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. +// +// matchAfterPrefix returns +1 if the buffer does match the boundary, +// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input. +// It returns -1 if the buffer definitely does NOT match the boundary, +// meaning the prefix is followed by some other character. +// For example, "--foobar" does not match "--foo". +// It returns 0 more input needs to be read to make the decision, +// meaning that len(buf) == len(prefix) and readErr == nil. +func matchAfterPrefix(buf, prefix []byte, readErr error) int { + if len(buf) == len(prefix) { + if readErr != nil { + return +1 } + return 0 } - n, err = p.buffer.Read(d) - if err == io.EOF && !foundBoundary { - // If the boundary hasn't been reached there's more to - // read, so don't pass through an EOF from the buffer - err = nil + c := buf[len(prefix)] + if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' { + return +1 } - return + return -1 } func (p *Part) Close() error { @@ -337,64 +392,6 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { return bytes.Equal(rest, mr.nl) } -// peekBufferIsEmptyPart reports whether the provided peek-ahead -// buffer represents an empty part. It is called only if we've not -// already read any bytes in this part and checks for the case of MIME -// software not writing the \r\n on empty parts. Some does, some -// doesn't. -// -// This checks that what follows the "--boundary" is actually the end -// ("--boundary--" with optional whitespace) or optional whitespace -// and then a newline, so we don't catch "--boundaryFAKE", in which -// case the whole line is part of the data. -func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { - // End of parts case. - // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` - if bytes.HasPrefix(peek, mr.dashBoundaryDash) { - rest := peek[len(mr.dashBoundaryDash):] - rest = skipLWSPChar(rest) - return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 - } - if !bytes.HasPrefix(peek, mr.dashBoundary) { - return false - } - // Test whether rest matches `^[ \t]*\r\n`) - rest := peek[len(mr.dashBoundary):] - rest = skipLWSPChar(rest) - return bytes.HasPrefix(rest, mr.nl) -} - -// peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in -// peek and whether it is a real boundary (and not a prefix of an -// unrelated separator). To be the end, the peek buffer must contain a -// newline after the boundary or contain the ending boundary (--separator--). -func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) { - idx = bytes.Index(peek, mr.nlDashBoundary) - if idx == -1 { - return - } - - peek = peek[idx+len(mr.nlDashBoundary):] - if len(peek) == 0 || len(peek) == 1 && peek[0] == '-' { - return idx, false - } - if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' { - return idx, true - } - peek = skipLWSPChar(peek) - // Don't have a complete line after the peek. - if bytes.IndexByte(peek, '\n') == -1 { - return idx, false - } - if len(peek) > 0 && peek[0] == '\n' { - return idx, true - } - if len(peek) > 1 && peek[0] == '\r' && peek[1] == '\n' { - return idx, true - } - return idx, false -} - // skipLWSPChar returns b with leading spaces and tabs removed. // RFC 822 defines: // LWSP-char = SPACE / HTAB |