summaryrefslogtreecommitdiff
path: root/src/compress
diff options
context:
space:
mode:
authorJames Robinson <jamesr@google.com>2014-10-20 12:58:02 +1100
committerJames Robinson <jamesr@google.com>2014-10-20 12:58:02 +1100
commit0dc0064f5b5db4c8197d6b20911ce73160417369 (patch)
treee0c0436a8e9222a657efe064d5ad780f7c958241 /src/compress
parent4a8829b092659039c4bb876c587524d10ae88b14 (diff)
downloadgo-0dc0064f5b5db4c8197d6b20911ce73160417369.tar.gz
compress/flate: add Reset() to allow reusing large buffers to compress multiple buffers
This adds a Reset() to compress/flate's decompressor and plumbs that through to compress/zlib and compress/gzip's Readers so callers can avoid large allocations when performing many inflate operations. In particular this preserves the allocation of the decompressor.hist buffer, which is 32kb and overwritten as needed while inflating. On the benchmark described in issue 6317, produces the following speedup on my 2.3ghz Intel Core i7 MBP with go version devel +6b696a34e0af Sun Aug 03 15:14:59 2014 -0700 darwin/amd64: blocked.text w/out patch vs blocked.text w/ patch: benchmark old ns/op new ns/op delta BenchmarkGunzip 8371577533 7927917687 -5.30% benchmark old allocs new allocs delta BenchmarkGunzip 176818 148519 -16.00% benchmark old bytes new bytes delta BenchmarkGunzip 292184936 12739528 -95.64% flat.text vs blocked.text w/patch: benchmark old ns/op new ns/op delta BenchmarkGunzip 7939447827 7927917687 -0.15% benchmark old allocs new allocs delta BenchmarkGunzip 90702 148519 +63.74% benchmark old bytes new bytes delta BenchmarkGunzip 9959528 12739528 +27.91% Similar speedups to those bradfitz saw in https://codereview.appspot.com/13416045. Fixes issue 6317. Fixes issue 7950. LGTM=nigeltao R=golang-codereviews, bradfitz, dan.kortschak, adg, nigeltao, jamesr CC=golang-codereviews https://codereview.appspot.com/97140043 Committer: Nigel Tao <nigeltao@golang.org>
Diffstat (limited to 'src/compress')
-rw-r--r--src/compress/flate/inflate.go27
-rw-r--r--src/compress/flate/inflate_test.go39
-rw-r--r--src/compress/gzip/gunzip.go6
-rw-r--r--src/compress/zlib/reader.go80
4 files changed, 125 insertions, 27 deletions
diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go
index a7fe94c50..76519bbf4 100644
--- a/src/compress/flate/inflate.go
+++ b/src/compress/flate/inflate.go
@@ -56,6 +56,15 @@ func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
+// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
+// to switch to a new underlying Reader. This permits reusing a ReadCloser
+// instead of allocating a new one.
+type Resetter interface {
+ // Reset discards any buffered data and resets the Resetter as if it was
+ // newly initialized with the given reader.
+ Reset(r io.Reader, dict []byte) error
+}
+
// Note that much of the implementation of huffmanDecoder is also copied
// into gen.go (in package main) for the purpose of precomputing the
// fixed huffman tables so they can be included statically.
@@ -679,12 +688,28 @@ func makeReader(r io.Reader) Reader {
return bufio.NewReader(r)
}
+func (f *decompressor) Reset(r io.Reader, dict []byte) error {
+ *f = decompressor{
+ r: makeReader(r),
+ bits: f.bits,
+ codebits: f.codebits,
+ hist: f.hist,
+ step: (*decompressor).nextBlock,
+ }
+ if dict != nil {
+ f.setDict(dict)
+ }
+ return nil
+}
+
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser
// when finished reading.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
var f decompressor
f.bits = new([maxLit + maxDist]int)
@@ -700,6 +725,8 @@ func NewReader(r io.Reader) io.ReadCloser {
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
var f decompressor
f.r = makeReader(r)
diff --git a/src/compress/flate/inflate_test.go b/src/compress/flate/inflate_test.go
new file mode 100644
index 000000000..9f25d30b3
--- /dev/null
+++ b/src/compress/flate/inflate_test.go
@@ -0,0 +1,39 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "bytes"
+ "io"
+ "testing"
+)
+
+func TestReset(t *testing.T) {
+ ss := []string{
+ "lorem ipsum izzle fo rizzle",
+ "the quick brown fox jumped over",
+ }
+
+ deflated := make([]bytes.Buffer, 2)
+ for i, s := range ss {
+ w, _ := NewWriter(&deflated[i], 1)
+ w.Write([]byte(s))
+ w.Close()
+ }
+
+ inflated := make([]bytes.Buffer, 2)
+
+ f := NewReader(&deflated[0])
+ io.Copy(&inflated[0], f)
+ f.(Resetter).Reset(&deflated[1], nil)
+ io.Copy(&inflated[1], f)
+ f.Close()
+
+ for i, s := range ss {
+ if s != inflated[i].String() {
+ t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s)
+ }
+ }
+}
diff --git a/src/compress/gzip/gunzip.go b/src/compress/gzip/gunzip.go
index fc08c7a48..df1d5aa2b 100644
--- a/src/compress/gzip/gunzip.go
+++ b/src/compress/gzip/gunzip.go
@@ -208,7 +208,11 @@ func (z *Reader) readHeader(save bool) error {
}
z.digest.Reset()
- z.decompressor = flate.NewReader(z.r)
+ if z.decompressor == nil {
+ z.decompressor = flate.NewReader(z.r)
+ } else {
+ z.decompressor.(flate.Resetter).Reset(z.r, nil)
+ }
return nil
}
diff --git a/src/compress/zlib/reader.go b/src/compress/zlib/reader.go
index e1191816d..816f1bf6b 100644
--- a/src/compress/zlib/reader.go
+++ b/src/compress/zlib/reader.go
@@ -51,10 +51,21 @@ type reader struct {
scratch [4]byte
}
-// NewReader creates a new io.ReadCloser.
-// Reads from the returned io.ReadCloser read and decompress data from r.
+// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
+// to switch to a new underlying Reader. This permits reusing a ReadCloser
+// instead of allocating a new one.
+type Resetter interface {
+ // Reset discards any buffered data and resets the Resetter as if it was
+ // newly initialized with the given reader.
+ Reset(r io.Reader, dict []byte) error
+}
+
+// NewReader creates a new ReadCloser.
+// Reads from the returned ReadCloser read and decompress data from r.
// The implementation buffers input and may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when done.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) (io.ReadCloser, error) {
return NewReaderDict(r, nil)
}
@@ -62,35 +73,14 @@ func NewReader(r io.Reader) (io.ReadCloser, error) {
// NewReaderDict is like NewReader but uses a preset dictionary.
// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
// If the compressed data refers to a different dictionary, NewReaderDict returns ErrDictionary.
+//
+// The ReadCloser returned by NewReaderDict also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
z := new(reader)
- if fr, ok := r.(flate.Reader); ok {
- z.r = fr
- } else {
- z.r = bufio.NewReader(r)
- }
- _, err := io.ReadFull(z.r, z.scratch[0:2])
+ err := z.Reset(r, dict)
if err != nil {
return nil, err
}
- h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
- if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) {
- return nil, ErrHeader
- }
- if z.scratch[1]&0x20 != 0 {
- _, err = io.ReadFull(z.r, z.scratch[0:4])
- if err != nil {
- return nil, err
- }
- checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
- if checksum != adler32.Checksum(dict) {
- return nil, ErrDictionary
- }
- z.decompressor = flate.NewReaderDict(z.r, dict)
- } else {
- z.decompressor = flate.NewReader(z.r)
- }
- z.digest = adler32.New()
return z, nil
}
@@ -131,3 +121,41 @@ func (z *reader) Close() error {
z.err = z.decompressor.Close()
return z.err
}
+
+func (z *reader) Reset(r io.Reader, dict []byte) error {
+ if fr, ok := r.(flate.Reader); ok {
+ z.r = fr
+ } else {
+ z.r = bufio.NewReader(r)
+ }
+ _, err := io.ReadFull(z.r, z.scratch[0:2])
+ if err != nil {
+ return err
+ }
+ h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
+ if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) {
+ return ErrHeader
+ }
+ haveDict := z.scratch[1]&0x20 != 0
+ if haveDict {
+ _, err = io.ReadFull(z.r, z.scratch[0:4])
+ if err != nil {
+ return err
+ }
+ checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
+ if checksum != adler32.Checksum(dict) {
+ return ErrDictionary
+ }
+ }
+ if z.decompressor == nil {
+ if haveDict {
+ z.decompressor = flate.NewReaderDict(z.r, dict)
+ } else {
+ z.decompressor = flate.NewReader(z.r)
+ }
+ } else {
+ z.decompressor.(flate.Resetter).Reset(z.r, dict)
+ }
+ z.digest = adler32.New()
+ return nil
+}