Skip to content

Commit

Permalink
inflate: Read more bits when decoding (#232)
Browse files Browse the repository at this point in the history
When a block is not marked as the final block it should be possible to read 10 further bits ahead without breaking the promise of not overreading.

Fixes #231

Also write blocks with only EOF as TYPE 01 (predefined tables) with only an EOB literal. This saves 3-4 bytes at no cost.

The smallest block seems to be a predefined block with a single EOB, which would be 10 bits + EOB from current block (current limit).

This should make it possible to fill more bits at the time when decoding.

```
λ benchcmp old.txt new.txt
benchmark                       old ns/op     new ns/op     delta
BenchmarkGunzipCopy-12          27830317      26617762      -4.36%
BenchmarkGunzipNoWriteTo-12     27878505      26705660      -4.21%

benchmark                       old MB/s     new MB/s     speedup
BenchmarkGunzipCopy-12          171.50       179.31       1.05x
BenchmarkGunzipNoWriteTo-12     171.20       178.72       1.04x
```
  • Loading branch information
klauspost committed Feb 20, 2020
1 parent 5a1fd75 commit 60e4844
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 35 deletions.
34 changes: 17 additions & 17 deletions flate/deflate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,24 @@ type reverseBitsTest struct {
}

var deflateTests = []*deflateTest{
{[]byte{}, 0, []byte{1, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{}, 0, []byte{0x3, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},

{[]byte{0x11}, 0, []byte{0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255}},
{[]byte{0x11, 0x12}, 0, []byte{0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255}},
{[]byte{0x11}, 0, []byte{0x0, 0x1, 0x0, 0xfe, 0xff, 0x11, 0x3, 0x0}},
{[]byte{0x11, 0x12}, 0, []byte{0x0, 0x2, 0x0, 0xfd, 0xff, 0x11, 0x12, 0x3, 0x0}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0,
[]byte{0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255},
[]byte{0x0, 0x8, 0x0, 0xf7, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x3, 0x0},
},
{[]byte{}, 1, []byte{1, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x12}, BestCompression, []byte{18, 20, 2, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, BestCompression, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}},
{[]byte{}, 9, []byte{1, 0, 0, 255, 255}},
{[]byte{0x11}, 9, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x12}, 9, []byte{18, 20, 2, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}},
{[]byte{}, 1, []byte{0x3, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11, 0x12}, BestCompression, []byte{0x12, 0x14, 0x2, 0xc, 0x0}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, BestCompression, []byte{0x12, 0x84, 0x2, 0xc0, 0x0}},
{[]byte{}, 9, []byte{0x3, 0x0}},
{[]byte{0x11}, 9, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11, 0x12}, 9, []byte{0x12, 0x14, 0x2, 0xc, 0x0}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{0x12, 0x84, 0x2, 0xc0, 0x0}},
}

var deflateInflateTests = []*deflateInflateTest{
Expand Down Expand Up @@ -110,7 +110,7 @@ func TestBulkHash4(t *testing.T) {
}

func TestDeflate(t *testing.T) {
for _, h := range deflateTests {
for i, h := range deflateTests {
var buf bytes.Buffer
w, err := NewWriter(&buf, h.level)
if err != nil {
Expand All @@ -120,7 +120,7 @@ func TestDeflate(t *testing.T) {
w.Write(h.in)
w.Close()
if !bytes.Equal(buf.Bytes(), h.out) {
t.Errorf("Deflate(%d, %x) = \n%#v, want \n%#v", h.level, h.in, buf.Bytes(), h.out)
t.Errorf("%d: Deflate(%d, %x) = \n%#v, want \n%#v", i, h.level, h.in, buf.Bytes(), h.out)
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,9 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
}
}

// writeStoredHeader will write a stored header.
// If the stored block is only used for EOF,
// it is replaced with a fixed huffman block.
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
Expand All @@ -493,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}

// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
if length == 0 && isEof {
w.writeFixedHeader(isEof)
// EOB: 7 bits, value: 0
w.writeBits(0, 7)
w.flush()
return
}

var flag int32
if isEof {
flag = 1
Expand Down
4 changes: 2 additions & 2 deletions flate/huffman_code.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
return h
}

var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
var fixedLiteralEncoding = generateFixedLiteralEncoding()
var fixedOffsetEncoding = generateFixedOffsetEncoding()

func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
Expand Down
50 changes: 34 additions & 16 deletions flate/inflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ const (
)

type huffmanDecoder struct {
min int // the minimum code length
maxRead int // the maximum number of bits we can read and not overread
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
Expand All @@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
}
if h.min != 0 {
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}

// Count number of codes of each length,
// compute min and max length.
// compute maxRead and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
Expand Down Expand Up @@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return false
}

h.min = min
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
Expand Down Expand Up @@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error {
return CorruptInputError(f.roffset)
}

// As an optimization, we can initialize the min bits to read at a time
// As an optimization, we can initialize the maxRead bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
if f.h1.maxRead < f.bits[endBlockMarker] {
f.h1.maxRead = f.bits[endBlockMarker]
}
if !f.final {
// If not the final block, the smallest block possible is
// a predefined table, BTYPE=01, with a single EOB marker.
// This will take up 3 + 7 bits.
f.h1.maxRead += 10
}

return nil
Expand Down Expand Up @@ -726,21 +732,33 @@ copyHistory:
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
left := (f.nb) & 7
f.nb -= left
f.b >>= left

offBytes := f.nb >> 3
// Unfilled values will be overwritten.
f.buf[0] = uint8(f.b)
f.buf[1] = uint8(f.b >> 8)
f.buf[2] = uint8(f.b >> 16)
f.buf[3] = uint8(f.b >> 24)

f.roffset += int64(offBytes)
f.nb, f.b = 0, 0

// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
if nn != ^n {
if debugDecode {
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
ncomp := ^n
fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
}
f.err = CorruptInputError(f.roffset)
return
Expand All @@ -752,7 +770,7 @@ func (f *decompressor) dataBlock() {
return
}

f.copyLen = n
f.copyLen = int(n)
f.copyData()
}

Expand Down Expand Up @@ -816,7 +834,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
n := uint(h.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
Expand Down

0 comments on commit 60e4844

Please sign in to comment.