Skip to content

Commit

Permalink
zstd: Inline blockDec.checkCRC
Browse files Browse the repository at this point in the history
checkCRC is now a uint32 with a boolean indicating whether it needs
checking. This makes the comparison shorter and avoids some allocations.

Benchmark results on older i7:

name                                                  old speed      new speed      delta
Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8          458MB/s ± 0%   458MB/s ± 0%     ~     (p=0.139 n=9+8)
Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8        568MB/s ± 2%   566MB/s ± 4%     ~     (p=0.971 n=10+10)
Decoder_DecoderSmall/geo.protodata.zst/buffered-8     1.19GB/s ± 0%  1.20GB/s ± 1%     ~     (p=0.051 n=9+10)
Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8    832MB/s ± 2%   839MB/s ± 2%     ~     (p=0.156 n=10+9)
Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8       352MB/s ± 0%   352MB/s ± 0%     ~     (p=0.148 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8     534MB/s ± 7%   542MB/s ± 3%     ~     (p=0.315 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/buffered-8         422MB/s ± 0%   422MB/s ± 0%     ~     (p=0.408 n=8+10)
Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8       606MB/s ± 9%   618MB/s ± 7%     ~     (p=0.393 n=10+10)
Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8       362MB/s ± 5%   367MB/s ± 0%   +1.44%  (p=0.026 n=9+10)
Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8     449MB/s ± 3%   463MB/s ± 4%   +3.28%  (p=0.003 n=10+9)
Decoder_DecoderSmall/alice29.txt.zst/buffered-8        361MB/s ± 0%   361MB/s ± 0%     ~     (p=0.888 n=8+9)
Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8      569MB/s ± 8%   577MB/s ± 2%     ~     (p=0.541 n=9+8)
Decoder_DecoderSmall/html_x_4.zst/buffered-8          2.46GB/s ± 1%  2.46GB/s ± 0%     ~     (p=0.720 n=10+9)
Decoder_DecoderSmall/html_x_4.zst/unbuffered-8        1.67GB/s ± 6%  1.65GB/s ± 4%     ~     (p=0.315 n=10+9)
Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8    3.87GB/s ± 0%  3.87GB/s ± 0%     ~     (p=0.497 n=10+9)
Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8  1.84GB/s ± 7%  1.83GB/s ± 2%     ~     (p=0.912 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8    8.63GB/s ± 1%  8.61GB/s ± 0%     ~     (p=0.063 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8  3.34GB/s ± 3%  3.32GB/s ± 5%     ~     (p=0.579 n=10+10)
Decoder_DecoderSmall/urls.10K.zst/buffered-8           586MB/s ± 1%   584MB/s ± 0%   -0.46%  (p=0.005 n=9+7)
Decoder_DecoderSmall/urls.10K.zst/unbuffered-8         877MB/s ± 2%   879MB/s ± 2%     ~     (p=0.796 n=10+10)
Decoder_DecoderSmall/html.zst/buffered-8               961MB/s ± 0%   963MB/s ± 0%   +0.19%  (p=0.004 n=9+9)
Decoder_DecoderSmall/html.zst/unbuffered-8             709MB/s ± 1%   715MB/s ± 3%     ~     (p=0.203 n=8+10)
Decoder_DecoderSmall/comp-data.bin.zst/buffered-8      407MB/s ± 0%   405MB/s ± 0%   -0.47%  (p=0.000 n=10+8)
Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8    152MB/s ± 3%   151MB/s ± 4%     ~     (p=0.853 n=10+10)

name                                                  old alloc/op   new alloc/op   delta
Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8            54.0B ± 0%     54.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8         4.57kB ± 3%    4.57kB ± 3%     ~     (p=0.985 n=10+10)
Decoder_DecoderSmall/geo.protodata.zst/buffered-8        49.0B ± 0%     49.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8     4.47kB ± 0%    4.47kB ± 1%     ~     (p=0.481 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8         68.6B ± 2%     68.0B ± 0%     ~     (p=0.173 n=10+9)
Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8      4.64kB ± 5%    4.62kB ± 4%     ~     (p=1.000 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/buffered-8           63.0B ± 0%     63.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8        4.65kB ± 3%    4.56kB ± 3%   -1.94%  (p=0.014 n=10+10)
Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8         53.0B ± 0%     53.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8      4.49kB ± 1%    4.45kB ± 2%   -0.93%  (p=0.006 n=9+9)
Decoder_DecoderSmall/alice29.txt.zst/buffered-8          77.9B ± 1%     77.5B ± 1%     ~     (p=0.176 n=9+8)
Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8       4.52kB ± 2%    4.48kB ± 1%   -0.94%  (p=0.003 n=10+9)
Decoder_DecoderSmall/html_x_4.zst/buffered-8             53.0B ± 6%     56.0B ± 0%     ~     (p=0.059 n=10+8)
Decoder_DecoderSmall/html_x_4.zst/unbuffered-8          4.46kB ± 0%    4.48kB ± 2%     ~     (p=1.000 n=8+10)
Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8       48.0B ± 0%     48.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8    4.46kB ± 0%    4.43kB ± 0%   -0.72%  (p=0.000 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8       48.0B ± 0%     48.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8    4.07kB ± 0%    4.04kB ± 0%   -0.79%  (p=0.000 n=9+10)
Decoder_DecoderSmall/urls.10K.zst/buffered-8             66.0B ± 0%     66.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/urls.10K.zst/unbuffered-8          4.60kB ± 3%    4.53kB ± 5%     ~     (p=0.052 n=10+10)
Decoder_DecoderSmall/html.zst/buffered-8                 49.0B ± 0%     49.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/html.zst/unbuffered-8              4.47kB ± 0%    4.46kB ± 1%     ~     (p=0.165 n=8+10)
Decoder_DecoderSmall/comp-data.bin.zst/buffered-8        48.0B ± 0%     48.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8     4.44kB ± 0%    4.44kB ± 0%     ~     (p=0.171 n=10+10)

name                                                  old allocs/op  new allocs/op  delta
Decoder_DecoderSmall/kppkn.gtb.zst/buffered-8             1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-8           40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/geo.protodata.zst/buffered-8         1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/geo.protodata.zst/unbuffered-8       40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/buffered-8          1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-8        40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/buffered-8            1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-8          40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/asyoulik.txt.zst/buffered-8          1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-8        40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/alice29.txt.zst/buffered-8           1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/alice29.txt.zst/unbuffered-8         40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/html_x_4.zst/buffered-8              1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/html_x_4.zst/unbuffered-8            40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-8        1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-8      40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-8        1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-8      32.0 ± 0%      24.0 ± 0%  -25.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/urls.10K.zst/buffered-8              1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/urls.10K.zst/unbuffered-8            40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/html.zst/buffered-8                  1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/html.zst/unbuffered-8                40.0 ± 0%      32.0 ± 0%  -20.00%  (p=0.000 n=10+10)
Decoder_DecoderSmall/comp-data.bin.zst/buffered-8         1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-8       32.0 ± 0%      32.0 ± 0%     ~     (all equal)

On a newer i5:

name                                                   old speed      new speed      delta
Decoder_DecoderSmall/kppkn.gtb.zst/buffered-16          730MB/s ± 2%   729MB/s ± 2%     ~     (p=0.971 n=10+10)
Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-16       1.16GB/s ± 5%  1.17GB/s ± 5%     ~     (p=0.720 n=10+9)
Decoder_DecoderSmall/geo.protodata.zst/buffered-16     2.45GB/s ± 2%  2.45GB/s ± 4%     ~     (p=0.853 n=10+10)
Decoder_DecoderSmall/geo.protodata.zst/unbuffered-16   2.47GB/s ± 1%  2.47GB/s ± 1%     ~     (p=0.631 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/buffered-16       520MB/s ± 1%   528MB/s ± 2%   +1.58%  (p=0.015 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-16    1.15GB/s ±12%  1.18GB/s ± 5%     ~     (p=0.393 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/buffered-16         615MB/s ± 2%   627MB/s ± 2%   +1.80%  (p=0.001 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-16      1.19GB/s ± 4%  1.20GB/s ± 5%     ~     (p=0.684 n=10+10)
Decoder_DecoderSmall/asyoulik.txt.zst/buffered-16       565MB/s ± 2%   572MB/s ± 2%     ~     (p=0.075 n=10+10)
Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-16    1.10GB/s ±10%  1.11GB/s ± 9%     ~     (p=0.481 n=10+10)
Decoder_DecoderSmall/alice29.txt.zst/buffered-16        560MB/s ± 1%   567MB/s ± 2%   +1.22%  (p=0.008 n=10+9)
Decoder_DecoderSmall/alice29.txt.zst/unbuffered-16      928MB/s ± 3%   919MB/s ± 3%     ~     (p=0.218 n=10+10)
Decoder_DecoderSmall/html_x_4.zst/buffered-16          4.12GB/s ± 2%  4.19GB/s ± 2%   +1.75%  (p=0.008 n=9+10)
Decoder_DecoderSmall/html_x_4.zst/unbuffered-16        4.79GB/s ± 2%  4.77GB/s ± 2%     ~     (p=0.542 n=10+10)
Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-16    6.02GB/s ± 2%  5.96GB/s ± 2%     ~     (p=0.123 n=10+10)
Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-16  5.78GB/s ± 1%  5.81GB/s ± 1%     ~     (p=0.123 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-16    11.5GB/s ± 2%  11.3GB/s ± 2%   -1.77%  (p=0.005 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-16  9.83GB/s ± 1%  9.80GB/s ± 1%     ~     (p=0.353 n=10+10)
Decoder_DecoderSmall/urls.10K.zst/buffered-16           845MB/s ± 1%   845MB/s ± 2%     ~     (p=0.796 n=10+10)
Decoder_DecoderSmall/urls.10K.zst/unbuffered-16        1.46GB/s ± 2%  1.46GB/s ± 3%     ~     (p=0.971 n=10+10)
Decoder_DecoderSmall/html.zst/buffered-16              1.80GB/s ± 2%  1.84GB/s ± 2%   +2.11%  (p=0.006 n=9+8)
Decoder_DecoderSmall/html.zst/unbuffered-16            2.16GB/s ± 1%  2.18GB/s ± 1%   +0.97%  (p=0.019 n=10+10)
Decoder_DecoderSmall/comp-data.bin.zst/buffered-16      702MB/s ± 3%   708MB/s ± 1%     ~     (p=0.165 n=10+10)
Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-16    609MB/s ± 1%   608MB/s ± 1%     ~     (p=0.150 n=9+10)

name                                                   old alloc/op   new alloc/op   delta
Decoder_DecoderSmall/kppkn.gtb.zst/buffered-16            55.0B ± 0%     55.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/kppkn.gtb.zst/unbuffered-16         4.51kB ± 1%    4.47kB ± 1%   -0.93%  (p=0.001 n=10+9)
Decoder_DecoderSmall/geo.protodata.zst/buffered-16        49.0B ± 0%     49.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/geo.protodata.zst/unbuffered-16     4.48kB ± 1%    4.44kB ± 0%   -0.87%  (p=0.000 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/buffered-16         74.5B ± 1%     74.2B ± 2%     ~     (p=0.450 n=10+10)
Decoder_DecoderSmall/plrabn12.txt.zst/unbuffered-16      4.56kB ± 2%    4.53kB ± 2%     ~     (p=0.306 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/buffered-16           67.5B ± 1%     67.0B ± 0%   -0.74%  (p=0.033 n=10+10)
Decoder_DecoderSmall/lcet10.txt.zst/unbuffered-16        4.55kB ± 1%    4.50kB ± 1%   -1.14%  (p=0.000 n=9+10)
Decoder_DecoderSmall/asyoulik.txt.zst/buffered-16         54.0B ± 0%     54.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/asyoulik.txt.zst/unbuffered-16      4.47kB ± 0%    4.44kB ± 0%   -0.66%  (p=0.000 n=9+10)
Decoder_DecoderSmall/alice29.txt.zst/buffered-16          74.6B ± 6%     75.3B ± 7%     ~     (p=0.883 n=10+10)
Decoder_DecoderSmall/alice29.txt.zst/unbuffered-16       4.48kB ± 0%    4.45kB ± 1%   -0.57%  (p=0.005 n=7+9)
Decoder_DecoderSmall/html_x_4.zst/buffered-16             54.0B ± 0%     52.4B ± 5%   -2.96%  (p=0.046 n=8+10)
Decoder_DecoderSmall/html_x_4.zst/unbuffered-16          4.48kB ± 0%    4.46kB ± 0%   -0.43%  (p=0.016 n=10+9)
Decoder_DecoderSmall/paper-100k.pdf.zst/buffered-16       48.0B ± 0%     48.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/paper-100k.pdf.zst/unbuffered-16    4.46kB ± 0%    4.43kB ± 0%   -0.69%  (p=0.000 n=10+10)
Decoder_DecoderSmall/fireworks.jpeg.zst/buffered-16       48.0B ± 0%     48.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/fireworks.jpeg.zst/unbuffered-16    4.07kB ± 0%    4.04kB ± 0%   -0.80%  (p=0.000 n=7+9)
Decoder_DecoderSmall/urls.10K.zst/buffered-16             71.4B ± 1%     71.6B ± 1%     ~     (p=0.656 n=10+10)
Decoder_DecoderSmall/urls.10K.zst/unbuffered-16          4.55kB ± 2%    4.53kB ± 2%     ~     (p=0.529 n=10+10)
Decoder_DecoderSmall/html.zst/buffered-16                 49.0B ± 0%     49.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/html.zst/unbuffered-16              4.48kB ± 0%    4.44kB ± 0%   -0.91%  (p=0.000 n=10+9)
Decoder_DecoderSmall/comp-data.bin.zst/buffered-16        48.0B ± 0%     48.0B ± 0%     ~     (all equal)
Decoder_DecoderSmall/comp-data.bin.zst/unbuffered-16     4.44kB ± 0%    4.44kB ± 0%     ~     (p=0.897 n=10+10)

allocs/op is the same as on the previous machine.
  • Loading branch information
greatroar committed Nov 24, 2022
1 parent eb034a2 commit 0ffe128
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 18 deletions.
5 changes: 3 additions & 2 deletions zstd/blockdec.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ type blockDec struct {

err error

// Check against this crc
checkCRC []byte
// Check against this crc, if hasCRC is true.
checkCRC uint32
hasCRC bool

// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
Expand Down
33 changes: 17 additions & 16 deletions zstd/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package zstd

import (
"bytes"
"context"
"encoding/binary"
"io"
Expand Down Expand Up @@ -471,18 +470,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
}
}
}
if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
got := d.current.crc.Sum64()
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
if !bytes.Equal(tmp[:], next.d.checkCRC) {
if next.err == nil && next.d != nil && next.d.hasCRC {
got := uint32(d.current.crc.Sum64())
if got != next.d.checkCRC {
if debugDecoder {
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
}
d.current.err = ErrCRCMismatch
} else {
if debugDecoder {
println("CRC ok", tmp[:])
printf("CRC ok %08x\n", got)
}
}
}
Expand Down Expand Up @@ -922,18 +919,22 @@ decodeStream:
println("next block returned error:", err)
}
dec.err = err
dec.checkCRC = nil
dec.hasCRC = false
if dec.Last && frame.HasCheckSum && err == nil {
crc, err := frame.rawInput.readSmall(4)
if err != nil {
if len(crc) < 4 {
if err != nil {
err = io.ErrUnexpectedEOF

}
println("CRC missing?", err)
dec.err = err
}
var tmp [4]byte
copy(tmp[:], crc)
dec.checkCRC = tmp[:]
if debugDecoder {
println("found crc to check:", dec.checkCRC)
} else {
dec.checkCRC = binary.LittleEndian.Uint32(crc)
dec.hasCRC = true
if debugDecoder {
printf("found crc to check: %08x\n", dec.checkCRC)
}
}
}
err = dec.err
Expand Down

0 comments on commit 0ffe128

Please sign in to comment.