From 0d728f0a6c697c5cd7939be56ca00ee6e1bdfab9 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sun, 23 Feb 2020 19:34:28 +0100 Subject: [PATCH] Reduce deflate table sizes (#227) * Reduce deflate table sizes Fixes #223 After: ``` BenchmarkCompressAllocations/level(-2)/flate-12 15889 75134 ns/op 342272 B/op 11 allocs/op BenchmarkCompressAllocations/level(-2)/gzip-12 15424 76300 ns/op 342448 B/op 12 allocs/op BenchmarkCompressAllocations/level(-1)/flate-12 2673 378711 ns/op 2448774 B/op 14 allocs/op BenchmarkCompressAllocations/level(-1)/gzip-12 3342 377507 ns/op 2448949 B/op 15 allocs/op BenchmarkCompressAllocations/level(0)/flate-12 17437 76986 ns/op 339968 B/op 9 allocs/op BenchmarkCompressAllocations/level(0)/gzip-12 15076 82031 ns/op 340144 B/op 10 allocs/op BenchmarkCompressAllocations/level(1)/flate-12 3382 377466 ns/op 1924486 B/op 14 allocs/op BenchmarkCompressAllocations/level(1)/gzip-12 3436 387788 ns/op 1924662 B/op 15 allocs/op BenchmarkCompressAllocations/level(2)/flate-12 1971 591518 ns/op 2710923 B/op 14 allocs/op BenchmarkCompressAllocations/level(2)/gzip-12 2073 516709 ns/op 2711102 B/op 15 allocs/op BenchmarkCompressAllocations/level(3)/flate-12 3250 426246 ns/op 2186626 B/op 14 allocs/op BenchmarkCompressAllocations/level(3)/gzip-12 3084 420084 ns/op 2186802 B/op 15 allocs/op BenchmarkCompressAllocations/level(4)/flate-12 2733 390467 ns/op 2186626 B/op 14 allocs/op BenchmarkCompressAllocations/level(4)/gzip-12 3165 400509 ns/op 2186802 B/op 15 allocs/op BenchmarkCompressAllocations/level(5)/flate-12 2797 417904 ns/op 2448774 B/op 14 allocs/op BenchmarkCompressAllocations/level(5)/gzip-12 2455 456214 ns/op 2448948 B/op 15 allocs/op BenchmarkCompressAllocations/level(6)/flate-12 2733 471116 ns/op 2448773 B/op 14 allocs/op BenchmarkCompressAllocations/level(6)/gzip-12 2673 443633 ns/op 2448949 B/op 15 allocs/op BenchmarkCompressAllocations/level(7)/flate-12 6015 198306 ns/op 1006979 B/op 13 allocs/op BenchmarkCompressAllocations/level(7)/gzip-12 5728 188045 ns/op 1007155 B/op 14 allocs/op BenchmarkCompressAllocations/level(8)/flate-12 6684 195617 ns/op 1006979 B/op 13 allocs/op BenchmarkCompressAllocations/level(8)/gzip-12 6331 193922 ns/op 1007155 B/op 14 allocs/op BenchmarkCompressAllocations/level(9)/flate-12 6015 193829 ns/op 1006980 B/op 13 allocs/op BenchmarkCompressAllocations/level(9)/gzip-12 5728 197447 ns/op 1007155 B/op 14 allocs/op ``` * Reduce bits and history buffer. --- compressible_test.go | 29 +++++++++++++++++++++++ flate/deflate.go | 4 +++- flate/fast_encoder.go | 7 +++--- flate/level1.go | 22 +++++++++--------- flate/level2.go | 28 +++++++++++----------- flate/level3.go | 54 +++++++++++++++++++++---------------------- flate/level4.go | 32 ++++++++++++------------- flate/level5.go | 46 ++++++++++++++++++------------------ flate/level6.go | 36 ++++++++++++++--------------- flate/token.go | 4 ++-- 10 files changed, 145 insertions(+), 117 deletions(-) diff --git a/compressible_test.go b/compressible_test.go index 09eb50b95b..16817c6fed 100644 --- a/compressible_test.go +++ b/compressible_test.go @@ -271,3 +271,32 @@ func BenchmarkCompressAllocations(b *testing.B) { }) } } + +func BenchmarkCompressAllocationsSingle(b *testing.B) { + payload := []byte(strings.Repeat("Tiny payload", 20)) + const level = 2 + b.Run("flate", func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + w, err := flate.NewWriter(ioutil.Discard, level) + if err != nil { + b.Fatal(err) + } + w.Write(payload) + w.Close() + } + }) + b.Run("gzip", func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + w, err := gzip.NewWriterLevel(ioutil.Discard, level) + if err != nil { + b.Fatal(err) + } + w.Write(payload) + w.Close() + } + }) +} diff --git a/flate/deflate.go b/flate/deflate.go index 3f4d76b6a4..2b101d26b2 100644 --- a/flate/deflate.go +++ b/flate/deflate.go @@ -48,6 +48,8 @@ const ( maxHashOffset = 1 << 24 skipNever = math.MaxInt32 + + debugDeflate = false ) type compressionLevel struct { @@ -365,7 +367,7 @@ func (d *compressor) deflateLazy() { // Sanity enables additional runtime tests. // It's intended to be used during development // to supplement the currently ad-hoc unit tests. - const sanity = false + const sanity = debugDeflate if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index 3d2fdcd77a..6d4c1e98bc 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -35,16 +35,16 @@ func newFastEnc(level int) fastEnc { } const ( - tableBits = 16 // Bits used in the table + tableBits = 15 // Bits used in the table tableSize = 1 << tableBits // Size of the table tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. baseMatchOffset = 1 // The smallest match offset baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 maxMatchOffset = 1 << 15 // The largest match offset - bTableBits = 18 // Bits used in the big tables + bTableBits = 17 // Bits used in the big tables bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history. + allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) @@ -92,7 +92,6 @@ func hash(u uint32) uint32 { } type tableEntry struct { - val uint32 offset int32 } diff --git a/flate/level1.go b/flate/level1.go index 102fc74c79..1e5eea3968 100644 --- a/flate/level1.go +++ b/flate/level1.go @@ -16,7 +16,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -81,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { } now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} nextHash = hash(uint32(now)) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } @@ -96,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { nextS++ candidate = e.table[nextHash] now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } cv = uint32(now) @@ -139,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+l+4) < len(src) { cv := load3232(src, s) - e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv} + e.table[hash(cv)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -153,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { x := load6432(src, s-2) o := e.cur + s - 2 prevHash := hash(uint32(x)) - e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHash] = tableEntry{offset: o} x >>= 16 currHash := hash(uint32(x)) candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)} + e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x) != candidate.val { + if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { cv = uint32(x >> 8) s++ break diff --git a/flate/level2.go b/flate/level2.go index dc6b1d3140..5b986a1944 100644 --- a/flate/level2.go +++ b/flate/level2.go @@ -18,7 +18,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -83,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { } candidate = e.table[nextHash] now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} nextHash = hash4u(uint32(now), bTableBits) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } @@ -98,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { nextS++ candidate = e.table[nextHash] now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { break } cv = uint32(now) @@ -148,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+l+4) < len(src) { cv := load3232(src, s) - e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv} + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -157,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { for i := s - l + 2; i < s-5; i += 7 { x := load6432(src, int32(i)) nextHash := hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i} // Skip one x >>= 16 nextHash = hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} // Skip one x >>= 16 nextHash = hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} } // We could immediately start working at s now, but to improve @@ -178,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { o := e.cur + s - 2 prevHash := hash4u(uint32(x), bTableBits) prevHash2 := hash4u(uint32(x>>8), bTableBits) - e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} - e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)} + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} currHash := hash4u(uint32(x>>16), bTableBits) candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)} + e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x>>16) != candidate.val { + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { cv = uint32(x >> 24) s++ break diff --git a/flate/level3.go b/flate/level3.go index 1a3ff9b6b7..c22b4244a5 100644 --- a/flate/level3.go +++ b/flate/level3.go @@ -15,7 +15,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -81,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { } candidates := e.table[nextHash] now := load3232(src, nextS) - e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}} + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} // Check both candidates candidate = candidates.Cur - offset := s - (candidate.offset - e.cur) - if cv == candidate.val { - if offset > maxMatchOffset { - cv = now - // Previous will also be invalid, we have nothing. - continue - } - o2 := s - (candidates.Prev.offset - e.cur) - if cv != candidates.Prev.val || o2 > maxMatchOffset { + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if cv == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { break } // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) if l2 > l1 { candidate = candidates.Prev @@ -106,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if cv == candidate.val { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - break - } + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + break } } cv = now @@ -158,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { nextHash := hash(cv) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + t, val: cv}, + Cur: tableEntry{offset: e.cur + t}, } } goto emitRemainder @@ -170,21 +171,21 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { prevHash := hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 3}, } x >>= 8 prevHash = hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 2}, } x >>= 8 prevHash = hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 1}, } x >>= 8 currHash := hash(uint32(x)) @@ -192,21 +193,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { cv = uint32(x) e.table[currHash] = tableEntryPrev{ Prev: candidates.Cur, - Cur: tableEntry{offset: s + e.cur, val: cv}, + Cur: tableEntry{offset: s + e.cur}, } // Check both candidates candidate = candidates.Cur - if cv == candidate.val { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - continue - } - } else { + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if cv == candidate.val { + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { offset := s - (candidate.offset - e.cur) if offset <= maxMatchOffset { continue diff --git a/flate/level4.go b/flate/level4.go index f3ecc9c4d5..e62f0c02b1 100644 --- a/flate/level4.go +++ b/flate/level4.go @@ -13,7 +13,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } // Protect against e.cur wraparound. @@ -92,24 +92,24 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry e.bTable[nextHashL] = entry t = lCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { // We got a long match. Use that. break } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... lCandidate = e.bTable[hash7(next, tableBits)] // If the next long is a candidate, check if we should use that instead... lOff := nextS - (lCandidate.offset - e.cur) - if lOff < maxMatchOffset && lCandidate.val == uint32(next) { + if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) if l2 > l1 { s = nextS @@ -137,7 +137,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { if nextEmit < s { emitLiteral(dst, src[nextEmit:s]) } - if false { + if debugDeflate { if t >= s { panic("s-t") } @@ -160,8 +160,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+8) < len(src) { cv := load6432(src, s) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -171,20 +171,20 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { i := nextS if i < s-1 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 i += 3 for ; i < s-1; i += 3 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 } } } @@ -195,8 +195,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { o := e.cur + s - 1 prevHashS := hash4x64(x, tableBits) prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} - e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} cv = x >> 8 } diff --git a/flate/level5.go b/flate/level5.go index 4e39168250..d513f1ffd3 100644 --- a/flate/level5.go +++ b/flate/level5.go @@ -13,7 +13,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -100,7 +100,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur @@ -110,14 +110,14 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { t = lCandidate.Cur.offset - e.cur if s-t < maxMatchOffset { - if uint32(cv) == lCandidate.Cur.val { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { l = e.matchlen(s+4, t+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4 if ml1 > l { @@ -129,30 +129,30 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { break } t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur break } } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... l = e.matchlen(s+4, t+4, src) + 4 lCandidate = e.bTable[nextHashL] // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // If the next long is a candidate, use that... t2 := lCandidate.Cur.offset - e.cur if nextS-t2 < maxMatchOffset { - if lCandidate.Cur.val == uint32(next) { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -163,7 +163,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { } // If the previous long is a candidate, use that... t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -197,7 +197,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { if nextEmit < s { emitLiteral(dst, src[nextEmit:s]) } - if false { + if debugDeflate { if t >= s { panic(fmt.Sprintln("s-t", s, t)) } @@ -226,31 +226,31 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { i := s - l + 1 if i < s-1 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t := tableEntry{offset: i + e.cur} e.table[hash4x64(cv, tableBits)] = t eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur // Do an long at i+1 cv >>= 8 - t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + t = tableEntry{offset: t.offset + 1} eLong = &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur // We only have enough bits for a short entry at i+2 cv >>= 8 - t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + t = tableEntry{offset: t.offset + 1} e.table[hash4x64(cv, tableBits)] = t // Skip one - otherwise we risk hitting 's' i += 4 for ; i < s-1; i += hashEvery { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 } } } @@ -261,9 +261,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { o := e.cur + s - 1 prevHashS := hash4x64(x, tableBits) prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHashS] = tableEntry{offset: o} eLong := &e.bTable[prevHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur cv = x >> 8 } diff --git a/flate/level6.go b/flate/level6.go index 00a3119776..a52c80ea45 100644 --- a/flate/level6.go +++ b/flate/level6.go @@ -13,7 +13,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -101,7 +101,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur @@ -112,17 +112,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { t = lCandidate.Cur.offset - e.cur if s-t < maxMatchOffset { - if uint32(cv) == lCandidate.Cur.val { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { // Long candidate matches at least 4 bytes. // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // Check the previous long candidate as well. t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { l = e.matchlen(s+4, t+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4 if ml1 > l { @@ -135,17 +135,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { } // Current value did not match, but check if previous long value does. t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur break } } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... l = e.matchlen(s+4, t+4, src) + 4 @@ -153,9 +153,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { lCandidate = e.bTable[nextHashL] // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // Check repeat at s + repOff const repOff = 1 @@ -174,7 +174,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // If the next long is a candidate, use that... t2 = lCandidate.Cur.offset - e.cur if nextS-t2 < maxMatchOffset { - if lCandidate.Cur.val == uint32(next) { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -185,7 +185,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { } // If the previous long is a candidate, use that... t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -244,9 +244,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Index after match end. for i := nextS + 1; i < int32(len(src))-8; i += 2 { cv := load6432(src, i) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur } goto emitRemainder } @@ -255,8 +255,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { if true { for i := nextS + 1; i < s-1; i += 2 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)] e.table[hash4x64(cv, tableBits)] = t diff --git a/flate/token.go b/flate/token.go index 099c0ddbc4..f9abf606d6 100644 --- a/flate/token.go +++ b/flate/token.go @@ -262,7 +262,7 @@ func (t *tokens) EstimatedBits() int { // AddMatch adds a match to the tokens. // This function is very sensitive to inlining and right on the border. func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { - if debugDecode { + if debugDeflate { if xlength >= maxMatchLength+baseMatchLength { panic(fmt.Errorf("invalid length: %v", xlength)) } @@ -281,7 +281,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { // AddMatchLong adds a match to the tokens, potentially longer than max match length. // Length should NOT have the base subtracted, only offset should. func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { - if debugDecode { + if debugDeflate { if xoffset >= maxMatchOffset+baseMatchOffset { panic(fmt.Errorf("invalid offset: %v", xoffset)) }