// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:generate go run gen.go -output fixedhuff.go // Package flate implements the DEFLATE compressed data format, described in // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file // formats. package flate import ( "bufio" "io" "strconv" ) const ( maxCodeLen = 16 // max length of Huffman code maxHist = 32768 // max history required // The next three numbers come from the RFC, section 3.2.7. maxLit = 286 maxDist = 32 numCodes = 19 // number of codes in Huffman meta-code ) // A CorruptInputError reports the presence of corrupt input at a given offset. type CorruptInputError int64 func (e CorruptInputError) Error() string { return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) } // An InternalError reports an error in the flate code itself. type InternalError string func (e InternalError) Error() string { return "flate: internal error: " + string(e) } // A ReadError reports an error encountered while reading input. type ReadError struct { Offset int64 // byte offset where error occurred Err error // error returned by underlying Read } func (e *ReadError) Error() string { return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() } // A WriteError reports an error encountered while writing output. type WriteError struct { Offset int64 // byte offset where error occurred Err error // error returned by underlying Write } func (e *WriteError) Error() string { return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() } // Note that much of the implementation of huffmanDecoder is also copied // into gen.go (in package main) for the purpose of precomputing the // fixed huffman tables so they can be included statically. // The data structure for decoding Huffman tables is based on that of // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), // For codes smaller than the table width, there are multiple entries // (each combination of trailing bits has the same value). For codes // larger than the table width, the table contains a link to an overflow // table. The width of each entry in the link table is the maximum code // size minus the chunk width. // Note that you can do a lookup in the table even without all bits // filled. Since the extra bits are zero, and the DEFLATE Huffman codes // have the property that shorter codes come before longer ones, the // bit length estimate in the result is a lower bound on the actual // number of bits. // chunk & 15 is number of bits // chunk >> 4 is value, including table link const ( huffmanChunkBits = 9 huffmanNumChunks = 1 << huffmanChunkBits huffmanCountMask = 15 huffmanValueShift = 4 ) type huffmanDecoder struct { min int // the minimum code length chunks [huffmanNumChunks]uint32 // chunks as described above links [][]uint32 // overflow links linkMask uint32 // mask the width of the link table } // Initialize Huffman decoding tables from array of code lengths. func (h *huffmanDecoder) init(bits []int) bool { if h.min != 0 { *h = huffmanDecoder{} } // Count number of codes of each length, // compute min and max length. var count [maxCodeLen]int var min, max int for _, n := range bits { if n == 0 { continue } if min == 0 || n < min { min = n } if n > max { max = n } count[n]++ } if max == 0 { return false } h.min = min var linkBits uint var numLinks int if max > huffmanChunkBits { linkBits = uint(max) - huffmanChunkBits numLinks = 1 << linkBits h.linkMask = uint32(numLinks - 1) } code := 0 var nextcode [maxCodeLen]int for i := min; i <= max; i++ { if i == huffmanChunkBits+1 { // create link tables link := code >> 1 if huffmanNumChunks < link { return false } h.links = make([][]uint32, huffmanNumChunks-link) for j := uint(link); j < huffmanNumChunks; j++ { reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 reverse >>= uint(16 - huffmanChunkBits) off := j - uint(link) h.chunks[reverse] = uint32(off<>8]) | int(reverseByte[code&0xff])<<8 reverse >>= uint(16 - n) if n <= huffmanChunkBits { for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { h.chunks[off] = chunk } } else { value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift if value >= uint32(len(h.links)) { return false } linktab := h.links[value] reverse >>= huffmanChunkBits for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { linktab[off] = chunk } } } return true } // The actual read interface needed by NewReader. // If the passed in io.Reader does not also have ReadByte, // the NewReader will introduce its own buffering. type Reader interface { io.Reader io.ByteReader } // Decompress state. type decompressor struct { // Input source. r Reader roffset int64 woffset int64 // Input bits, in top of b. b uint32 nb uint // Huffman decoders for literal/length, distance. h1, h2 huffmanDecoder // Length arrays used to define Huffman codes. bits *[maxLit + maxDist]int codebits *[numCodes]int // Output history, buffer. hist *[maxHist]byte hp int // current output position in buffer hw int // have written hist[0:hw] already hfull bool // buffer has filled at least once // Temporary buffer (avoids repeated allocation). buf [4]byte // Next step in the decompression, // and decompression state. step func(*decompressor) final bool err error toRead []byte hl, hd *huffmanDecoder copyLen int copyDist int } func (f *decompressor) nextBlock() { if f.final { if f.hw != f.hp { f.flush((*decompressor).nextBlock) return } f.err = io.EOF return } for f.nb < 1+2 { if f.err = f.moreBits(); f.err != nil { return } } f.final = f.b&1 == 1 f.b >>= 1 typ := f.b & 3 f.b >>= 2 f.nb -= 1 + 2 switch typ { case 0: f.dataBlock() case 1: // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil f.huffmanBlock() case 2: // compressed, dynamic Huffman tables if f.err = f.readHuffman(); f.err != nil { break } f.hl = &f.h1 f.hd = &f.h2 f.huffmanBlock() default: // 3 is reserved. f.err = CorruptInputError(f.roffset) } } func (f *decompressor) Read(b []byte) (int, error) { for { if len(f.toRead) > 0 { n := copy(b, f.toRead) f.toRead = f.toRead[n:] return n, nil } if f.err != nil { return 0, f.err } f.step(f) } } func (f *decompressor) Close() error { if f.err == io.EOF { return nil } return f.err } // RFC 1951 section 3.2.7. // Compression with dynamic Huffman codes var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} func (f *decompressor) readHuffman() error { // HLIT[5], HDIST[5], HCLEN[4]. for f.nb < 5+5+4 { if err := f.moreBits(); err != nil { return err } } nlit := int(f.b&0x1F) + 257 if nlit > maxLit { return CorruptInputError(f.roffset) } f.b >>= 5 ndist := int(f.b&0x1F) + 1 // maxDist is 32, so ndist is always valid. f.b >>= 5 nclen := int(f.b&0xF) + 4 // numCodes is 19, so nclen is always valid. f.b >>= 4 f.nb -= 5 + 5 + 4 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. for i := 0; i < nclen; i++ { for f.nb < 3 { if err := f.moreBits(); err != nil { return err } } f.codebits[codeOrder[i]] = int(f.b & 0x7) f.b >>= 3 f.nb -= 3 } for i := nclen; i < len(codeOrder); i++ { f.codebits[codeOrder[i]] = 0 } if !f.h1.init(f.codebits[0:]) { return CorruptInputError(f.roffset) } // HLIT + 257 code lengths, HDIST + 1 code lengths, // using the code length Huffman code. for i, n := 0, nlit+ndist; i < n; { x, err := f.huffSym(&f.h1) if err != nil { return err } if x < 16 { // Actual length. f.bits[i] = x i++ continue } // Repeat previous length or zero. var rep int var nb uint var b int switch x { default: return InternalError("unexpected length code") case 16: rep = 3 nb = 2 if i == 0 { return CorruptInputError(f.roffset) } b = f.bits[i-1] case 17: rep = 3 nb = 3 b = 0 case 18: rep = 11 nb = 7 b = 0 } for f.nb < nb { if err := f.moreBits(); err != nil { return err } } rep += int(f.b & uint32(1<>= nb f.nb -= nb if i+rep > n { return CorruptInputError(f.roffset) } for j := 0; j < rep; j++ { f.bits[i] = b i++ } } if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { return CorruptInputError(f.roffset) } return nil } // Decode a single Huffman block from f. // hl and hd are the Huffman states for the lit/length values // and the distance values, respectively. If hd == nil, using the // fixed distance encoding associated with fixed Huffman blocks. func (f *decompressor) huffmanBlock() { for { v, err := f.huffSym(f.hl) if err != nil { f.err = err return } var n uint // number of bits extra var length int switch { case v < 256: f.hist[f.hp] = byte(v) f.hp++ if f.hp == len(f.hist) { // After the flush, continue this loop. f.flush((*decompressor).huffmanBlock) return } continue case v == 256: // Done with huffman block; read next block. f.step = (*decompressor).nextBlock return // otherwise, reference to older data case v < 265: length = v - (257 - 3) n = 0 case v < 269: length = v*2 - (265*2 - 11) n = 1 case v < 273: length = v*4 - (269*4 - 19) n = 2 case v < 277: length = v*8 - (273*8 - 35) n = 3 case v < 281: length = v*16 - (277*16 - 67) n = 4 case v < 285: length = v*32 - (281*32 - 131) n = 5 default: length = 258 n = 0 } if n > 0 { for f.nb < n { if err = f.moreBits(); err != nil { f.err = err return } } length += int(f.b & uint32(1<>= n f.nb -= n } var dist int if f.hd == nil { for f.nb < 5 { if err = f.moreBits(); err != nil { f.err = err return } } dist = int(reverseByte[(f.b&0x1F)<<3]) f.b >>= 5 f.nb -= 5 } else { if dist, err = f.huffSym(f.hd); err != nil { f.err = err return } } switch { case dist < 4: dist++ case dist >= 30: f.err = CorruptInputError(f.roffset) return default: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << nb for f.nb < nb { if err = f.moreBits(); err != nil { f.err = err return } } extra |= int(f.b & uint32(1<>= nb f.nb -= nb dist = 1<<(nb+1) + 1 + extra } // Copy history[-dist:-dist+length] into output. if dist > len(f.hist) { f.err = InternalError("bad history distance") return } // No check on length; encoding can be prescient. if !f.hfull && dist > f.hp { f.err = CorruptInputError(f.roffset) return } f.copyLen, f.copyDist = length, dist if f.copyHist() { return } } } // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. // It reports whether the f.hist buffer is full. func (f *decompressor) copyHist() bool { p := f.hp - f.copyDist if p < 0 { p += len(f.hist) } for f.copyLen > 0 { n := f.copyLen if x := len(f.hist) - f.hp; n > x { n = x } if x := len(f.hist) - p; n > x { n = x } forwardCopy(f.hist[:], f.hp, p, n) p += n f.hp += n f.copyLen -= n if f.hp == len(f.hist) { // After flush continue copying out of history. f.flush((*decompressor).copyHuff) return true } if p == len(f.hist) { p = 0 } } return false } func (f *decompressor) copyHuff() { if f.copyHist() { return } f.huffmanBlock() } // Copy a single uncompressed data block from input to output. func (f *decompressor) dataBlock() { // Uncompressed. // Discard current half-byte. f.nb = 0 f.b = 0 // Length then ones-complement of length. nr, err := io.ReadFull(f.r, f.buf[0:4]) f.roffset += int64(nr) if err != nil { f.err = &ReadError{f.roffset, err} return } n := int(f.buf[0]) | int(f.buf[1])<<8 nn := int(f.buf[2]) | int(f.buf[3])<<8 if uint16(nn) != uint16(^n) { f.err = CorruptInputError(f.roffset) return } if n == 0 { // 0-length block means sync f.flush((*decompressor).nextBlock) return } f.copyLen = n f.copyData() } // copyData copies f.copyLen bytes from the underlying reader into f.hist. // It pauses for reads when f.hist is full. func (f *decompressor) copyData() { n := f.copyLen for n > 0 { m := len(f.hist) - f.hp if m > n { m = n } m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) f.roffset += int64(m) if err != nil { f.err = &ReadError{f.roffset, err} return } n -= m f.hp += m if f.hp == len(f.hist) { f.copyLen = n f.flush((*decompressor).copyData) return } } f.step = (*decompressor).nextBlock } func (f *decompressor) setDict(dict []byte) { if len(dict) > len(f.hist) { // Will only remember the tail. dict = dict[len(dict)-len(f.hist):] } f.hp = copy(f.hist[:], dict) if f.hp == len(f.hist) { f.hp = 0 f.hfull = true } f.hw = f.hp } func (f *decompressor) moreBits() error { c, err := f.r.ReadByte() if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } return err } f.roffset++ f.b |= uint32(c) << f.nb f.nb += 8 return nil } // Read the next Huffman-encoded symbol from f according to h. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { n := uint(h.min) for { for f.nb < n { if err := f.moreBits(); err != nil { return 0, err } } chunk := h.chunks[f.b&(huffmanNumChunks-1)] n = uint(chunk & huffmanCountMask) if n > huffmanChunkBits { chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] n = uint(chunk & huffmanCountMask) if n == 0 { f.err = CorruptInputError(f.roffset) return 0, f.err } } if n <= f.nb { f.b >>= n f.nb -= n return int(chunk >> huffmanValueShift), nil } } } // Flush any buffered output to the underlying writer. func (f *decompressor) flush(step func(*decompressor)) { f.toRead = f.hist[f.hw:f.hp] f.woffset += int64(f.hp - f.hw) f.hw = f.hp if f.hp == len(f.hist) { f.hp = 0 f.hw = 0 f.hfull = true } f.step = step } func makeReader(r io.Reader) Reader { if rr, ok := r.(Reader); ok { return rr } return bufio.NewReader(r) } // NewReader returns a new ReadCloser that can be used // to read the uncompressed version of r. It is the caller's // responsibility to call Close on the ReadCloser when // finished reading. func NewReader(r io.Reader) io.ReadCloser { var f decompressor f.bits = new([maxLit + maxDist]int) f.codebits = new([numCodes]int) f.r = makeReader(r) f.hist = new([maxHist]byte) f.step = (*decompressor).nextBlock return &f } // NewReaderDict is like NewReader but initializes the reader // with a preset dictionary. The returned Reader behaves as if // the uncompressed data stream started with the given dictionary, // which has already been read. NewReaderDict is typically used // to read data compressed by NewWriterDict. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { var f decompressor f.r = makeReader(r) f.hist = new([maxHist]byte) f.bits = new([maxLit + maxDist]int) f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock f.setDict(dict) return &f }