diff options
Diffstat (limited to 'libgo/go/compress/flate/deflate.go')
-rw-r--r-- | libgo/go/compress/flate/deflate.go | 406 |
1 files changed, 290 insertions, 116 deletions
diff --git a/libgo/go/compress/flate/deflate.go b/libgo/go/compress/flate/deflate.go index 169a0c7b2e..4d6a5357d8 100644 --- a/libgo/go/compress/flate/deflate.go +++ b/libgo/go/compress/flate/deflate.go @@ -13,59 +13,81 @@ import ( const ( NoCompression = 0 BestSpeed = 1 - fastCompression = 3 BestCompression = 9 DefaultCompression = -1 - logWindowSize = 15 - windowSize = 1 << logWindowSize - windowMask = windowSize - 1 - logMaxOffsetSize = 15 // Standard DEFLATE - minMatchLength = 3 // The smallest match that the compressor looks for - maxMatchLength = 258 // The longest match for the compressor - minOffsetSize = 1 // The shortest offset that makes any sense - - // The maximum number of tokens we put into a single flat block, just to + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 +) + +const ( + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + + // The LZ77 step produces a sequence of literal tokens and <length, offset> + // pair tokens. The offset is also known as distance. The underlying wire + // format limits the range of lengths and offsets. For example, there are + // 256 legitimate lengths: those in the range [3, 258]. This package's + // compressor uses a higher minimum match length, enabling optimizations + // such as finding matches via 32-bit loads and compares. + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + minMatchLength = 4 // The smallest match length that the compressor actually emits + maxMatchLength = 258 // The largest match length + baseMatchOffset = 1 // The smallest match offset + maxMatchOffset = 1 << 15 // The largest match offset + + // The maximum number of tokens we put into a single flate block, just to // stop things from getting too large. maxFlateBlockTokens = 1 << 14 maxStoreBlockSize = 65535 - hashBits = 17 + hashBits = 17 // After 17 performance degrades hashSize = 1 << hashBits hashMask = (1 << hashBits) - 1 - hashShift = (hashBits + minMatchLength - 1) / minMatchLength maxHashOffset = 1 << 24 skipNever = math.MaxInt32 ) type compressionLevel struct { - good, lazy, nice, chain, fastSkipHashing int + level, good, lazy, nice, chain, fastSkipHashing int } var levels = []compressionLevel{ - {}, // 0 - // For levels 1-3 we don't bother trying with lazy matches - {3, 0, 8, 4, 4}, - {3, 0, 16, 8, 5}, - {3, 0, 32, 32, 6}, + {0, 0, 0, 0, 0, 0}, // NoCompression. + {1, 0, 0, 0, 0, 0}, // BestSpeed uses a custom algorithm; see deflatefast.go. + // For levels 2-3 we don't bother trying with lazy matches. + {2, 4, 0, 16, 8, 5}, + {3, 4, 0, 32, 32, 6}, // Levels 4-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". - {4, 4, 16, 16, skipNever}, - {8, 16, 32, 32, skipNever}, - {8, 16, 128, 128, skipNever}, - {8, 32, 128, 256, skipNever}, - {32, 128, 258, 1024, skipNever}, - {32, 258, 258, 4096, skipNever}, + {4, 4, 4, 16, 16, skipNever}, + {5, 8, 16, 32, 32, skipNever}, + {6, 8, 16, 128, 128, skipNever}, + {7, 8, 32, 128, 256, skipNever}, + {8, 32, 128, 258, 1024, skipNever}, + {9, 32, 258, 258, 4096, skipNever}, } type compressor struct { compressionLevel - w *huffmanBitWriter + w *huffmanBitWriter + bulkHasher func([]byte, []uint32) // compression algorithm - fill func(*compressor, []byte) int // copy data to window - step func(*compressor) // process window - sync bool // requesting flush + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + sync bool // requesting flush + bestSpeed *deflateFast // Encoder for BestSpeed // Input hash chains // hashHead[hashValue] contains the largest inputIndex with the specified hash value @@ -73,8 +95,8 @@ type compressor struct { // hashPrev[hashHead[hashValue] & windowMask] contains the previous index // with the same hash value. chainHead int - hashHead []int - hashPrev []int + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 hashOffset int // input window: unprocessed data is window[index:windowEnd] @@ -90,9 +112,12 @@ type compressor struct { // deflate state length int offset int - hash int + hash uint32 maxInsertIndex int err error + + // hashMatch must be able to contain hashes for the maximum match length. + hashMatch [maxMatchLength - 1]uint32 } func (d *compressor) fillDeflate(b []byte) int { @@ -111,16 +136,19 @@ func (d *compressor) fillDeflate(b []byte) int { delta := d.hashOffset - 1 d.hashOffset -= delta d.chainHead -= delta - for i, v := range d.hashPrev { - if v > delta { - d.hashPrev[i] -= delta + + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range d.hashPrev[:] { + if int(v) > delta { + d.hashPrev[i] = uint32(int(v) - delta) } else { d.hashPrev[i] = 0 } } - for i, v := range d.hashHead { - if v > delta { - d.hashHead[i] -= delta + for i, v := range d.hashHead[:] { + if int(v) > delta { + d.hashHead[i] = uint32(int(v) - delta) } else { d.hashHead[i] = 0 } @@ -132,19 +160,74 @@ func (d *compressor) fillDeflate(b []byte) int { return n } -func (d *compressor) writeBlock(tokens []token, index int, eof bool) error { - if index > 0 || eof { +func (d *compressor) writeBlock(tokens []token, index int) error { + if index > 0 { var window []byte if d.blockStart <= index { window = d.window[d.blockStart:index] } d.blockStart = index - d.w.writeBlock(tokens, eof, window) + d.w.writeBlock(tokens, false, window) return d.w.err } return nil } +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only mode. + if d.compressionLevel.level < 2 { + return + } + if d.index != 0 || d.windowEnd != 0 { + panic("internal error: fillWindow called with stale data") + } + + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window, b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + index := j * 256 + end := index + 256 + minMatchLength - 1 + if end > n { + end = n + } + toCheck := d.window[index:end] + dstSize := len(toCheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := d.hashMatch[:dstSize] + d.bulkHasher(toCheck, dst) + var newH uint32 + for i, val := range dst { + di := i + index + newH = val + hh := &d.hashHead[newH&hashMask] + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = *hh + // Set the head of the hash chain to us. + *hh = uint32(di + d.hashOffset) + } + d.hash = newH + } + // Update window information. + d.windowEnd = n + d.index = n +} + // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { @@ -168,20 +251,15 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead tries >>= 2 } - w0 := win[pos] - w1 := win[pos+1] wEnd := win[pos+length] + wPos := win[pos:] minIndex := pos - windowSize for i := prevHead; tries > 0; tries-- { - if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { - // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches + if wEnd == win[i+length] { + n := matchLen(win[i:], wPos, minMatchLook) - n := 3 - for pos+n < len(win) && win[i+n] == win[pos+n] { - n++ - } - if n > length && (n > 3 || pos-i <= 4096) { + if n > length && (n > minMatchLength || pos-i <= 4096) { length = n offset = pos - i ok = true @@ -196,7 +274,8 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead // hashPrev[i & windowMask] has already been overwritten, so stop now. break } - if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 { + i = int(d.hashPrev[i&windowMask]) - d.hashOffset + if i < minIndex || i < 0 { break } } @@ -211,9 +290,85 @@ func (d *compressor) writeStoredBlock(buf []byte) error { return d.w.err } +const hashmul = 0x1e35a7bd + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < minMatchLength { + return + } + hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + dst[0] = (hb * hashmul) >> (32 - hashBits) + end := len(b) - minMatchLength + 1 + for i := 1; i < end; i++ { + hb = (hb << 8) | uint32(b[i+3]) + dst[i] = (hb * hashmul) >> (32 - hashBits) + } +} + +// matchLen returns the number of matching bytes in a and b +// up to length 'max'. Both slices must be at least 'max' +// bytes in size. +func matchLen(a, b []byte, max int) int { + a = a[:max] + b = b[:len(a)] + for i, av := range a { + if b[i] != av { + return i + } + } + return max +} + +// encSpeed will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) encSpeed() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < maxStoreBlockSize { + if !d.sync { + return + } + + // Handle small sizes. + if d.windowEnd < 128 { + switch { + case d.windowEnd == 0: + return + case d.windowEnd <= 16: + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + default: + d.w.writeBlockHuff(false, d.window[:d.windowEnd]) + d.err = d.w.err + } + d.windowEnd = 0 + d.bestSpeed.reset() + return + } + + } + // Encode the block. + d.tokens = d.bestSpeed.encode(d.tokens[:0], d.window[:d.windowEnd]) + + // If we removed less than 1/16th, Huffman compress the block. + if len(d.tokens) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd]) + } else { + d.w.writeBlockDynamic(d.tokens, false, d.window[:d.windowEnd]) + } + d.err = d.w.err + d.windowEnd = 0 +} + func (d *compressor) initDeflate() { - d.hashHead = make([]int, hashSize) - d.hashPrev = make([]int, windowSize) d.window = make([]byte, 2*windowSize) d.hashOffset = 1 d.tokens = make([]token, 0, maxFlateBlockTokens+1) @@ -223,6 +378,7 @@ func (d *compressor) initDeflate() { d.index = 0 d.hash = 0 d.chainHead = -1 + d.bulkHasher = bulkHash4 } func (d *compressor) deflate() { @@ -232,7 +388,7 @@ func (d *compressor) deflate() { d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) if d.index < d.maxInsertIndex { - d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1]) + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) } Loop: @@ -256,7 +412,7 @@ Loop: d.byteAvailable = false } if len(d.tokens) > 0 { - if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + if d.err = d.writeBlock(d.tokens, d.index); d.err != nil { return } d.tokens = d.tokens[:0] @@ -266,10 +422,11 @@ Loop: } if d.index < d.maxInsertIndex { // Update the hash - d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask - d.chainHead = d.hashHead[d.hash] - d.hashPrev[d.index&windowMask] = d.chainHead - d.hashHead[d.hash] = d.index + d.hashOffset + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) + hh := &d.hashHead[d.hash&hashMask] + d.chainHead = int(*hh) + d.hashPrev[d.index&windowMask] = uint32(d.chainHead) + *hh = uint32(d.index + d.hashOffset) } prevLength := d.length prevOffset := d.offset @@ -293,9 +450,9 @@ Loop: // There was a match at the previous step, and the current match is // not better. Output the previous match. if d.fastSkipHashing != skipNever { - d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))) + d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset))) } else { - d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))) + d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset))) } // Insert in the hash table all strings up to the end of the match. // index and index-1 are already inserted. If there is not enough @@ -310,12 +467,13 @@ Loop: } for d.index++; d.index < newIndex; d.index++ { if d.index < d.maxInsertIndex { - d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) // Get previous value with the same hash. // Our chain should point to the previous value. - d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] + hh := &d.hashHead[d.hash&hashMask] + d.hashPrev[d.index&windowMask] = *hh // Set the head of the hash chain to us. - d.hashHead[d.hash] = d.index + d.hashOffset + *hh = uint32(d.index + d.hashOffset) } } if d.fastSkipHashing == skipNever { @@ -327,12 +485,12 @@ Loop: // item into the table. d.index += d.length if d.index < d.maxInsertIndex { - d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1])) + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) } } if len(d.tokens) == maxFlateBlockTokens { // The block includes the current character - if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + if d.err = d.writeBlock(d.tokens, d.index); d.err != nil { return } d.tokens = d.tokens[:0] @@ -345,7 +503,7 @@ Loop: } d.tokens = append(d.tokens, literalToken(uint32(d.window[i]))) if len(d.tokens) == maxFlateBlockTokens { - if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil { + if d.err = d.writeBlock(d.tokens, i+1); d.err != nil { return } d.tokens = d.tokens[:0] @@ -366,23 +524,43 @@ func (d *compressor) fillStore(b []byte) int { } func (d *compressor) store() { - if d.windowEnd > 0 { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 } +} + +// storeHuff compresses and stores the currently added data +// when the d.window is full or we are at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd]) + d.err = d.w.err d.windowEnd = 0 } func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } n = len(b) - b = b[d.fill(d, b):] for len(b) > 0 { d.step(d) b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } } - return n, d.err + return n, nil } func (d *compressor) syncFlush() error { + if d.err != nil { + return d.err + } d.sync = true d.step(d) if d.err == nil { @@ -402,56 +580,53 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillStore d.step = (*compressor).store + case level == HuffmanOnly: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillStore + d.step = (*compressor).storeHuff + case level == BestSpeed: + d.compressionLevel = levels[level] + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillStore + d.step = (*compressor).encSpeed + d.bestSpeed = newDeflateFast() + d.tokens = make([]token, maxStoreBlockSize) case level == DefaultCompression: level = 6 fallthrough - case 1 <= level && level <= 9: + case 2 <= level && level <= 9: d.compressionLevel = levels[level] d.initDeflate() d.fill = (*compressor).fillDeflate d.step = (*compressor).deflate default: - return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level) + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) } return nil } -var zeroes [32]int -var bzeroes [256]byte - func (d *compressor) reset(w io.Writer) { d.w.reset(w) d.sync = false d.err = nil - switch d.compressionLevel.chain { - case 0: - // level was NoCompression. - for i := range d.window { - d.window[i] = 0 - } + switch d.compressionLevel.level { + case NoCompression: + d.windowEnd = 0 + case BestSpeed: d.windowEnd = 0 + d.tokens = d.tokens[:0] + d.bestSpeed.reset() default: d.chainHead = -1 - for s := d.hashHead; len(s) > 0; { - n := copy(s, zeroes[:]) - s = s[n:] + for i := range d.hashHead { + d.hashHead[i] = 0 } - for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] { - copy(s, zeroes[:]) + for i := range d.hashPrev { + d.hashPrev[i] = 0 } d.hashOffset = 1 - d.index, d.windowEnd = 0, 0 - for s := d.window; len(s) > 0; { - n := copy(s, bzeroes[:]) - s = s[n:] - } d.blockStart, d.byteAvailable = 0, false - - d.tokens = d.tokens[:maxFlateBlockTokens+1] - for i := 0; i <= maxFlateBlockTokens; i++ { - d.tokens[i] = 0 - } d.tokens = d.tokens[:0] d.length = minMatchLength - 1 d.offset = 0 @@ -461,6 +636,9 @@ func (d *compressor) reset(w io.Writer) { } func (d *compressor) close() error { + if d.err != nil { + return d.err + } d.sync = true d.step(d) if d.err != nil { @@ -477,10 +655,13 @@ func (d *compressor) close() error { // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); // higher levels typically run slower but compress more. Level 0 // (NoCompression) does not attempt any compression; it only adds the -// necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default -// compression level. +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (HuffmanOnly) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. // -// If level is in the range [-1, 9] then the error returned will be nil. +// If level is in the range [-2, 9] then the error returned will be nil. // Otherwise the error returned will be non-nil. func NewWriter(w io.Writer, level int) (*Writer, error) { var dw Writer @@ -491,34 +672,28 @@ func NewWriter(w io.Writer, level int) (*Writer, error) { } // NewWriterDict is like NewWriter but initializes the new -// Writer with a preset dictionary. The returned Writer behaves +// Writer with a preset dictionary. The returned Writer behaves // as if the dictionary had been written to it without producing -// any compressed output. The compressed data written to w +// any compressed output. The compressed data written to w // can only be decompressed by a Reader initialized with the // same dictionary. func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { - dw := &dictWriter{w, false} + dw := &dictWriter{w} zw, err := NewWriter(dw, level) if err != nil { return nil, err } - zw.Write(dict) - zw.Flush() - dw.enabled = true + zw.d.fillWindow(dict) zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. return zw, err } type dictWriter struct { - w io.Writer - enabled bool + w io.Writer } func (w *dictWriter) Write(b []byte) (n int, err error) { - if w.enabled { - return w.w.Write(b) - } - return len(b), nil + return w.w.Write(b) } // A Writer takes data written to it and writes the compressed @@ -534,10 +709,12 @@ func (w *Writer) Write(data []byte) (n int, err error) { return w.d.write(data) } -// Flush flushes any pending compressed data to the underlying writer. +// Flush flushes any pending data to the underlying writer. // It is useful mainly in compressed network protocols, to ensure that // a remote reader has enough data to reconstruct a packet. // Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. // If the underlying writer returns an error, Flush returns that error. // // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. @@ -556,14 +733,11 @@ func (w *Writer) Close() error { // the result of NewWriter or NewWriterDict called with dst // and w's level and dictionary. func (w *Writer) Reset(dst io.Writer) { - if dw, ok := w.d.w.w.(*dictWriter); ok { + if dw, ok := w.d.w.writer.(*dictWriter); ok { // w was created with NewWriterDict dw.w = dst w.d.reset(dw) - dw.enabled = false - w.Write(w.dict) - w.Flush() - dw.enabled = true + w.d.fillWindow(w.dict) } else { // w was created with NewWriter w.d.reset(dst) |