Skip to content

Commit b15768a

Browse files
committed
zstd: Write table clearing in a way that the compiler recognizes
Benchmark results on amd64 below. These do not take into account klauspost#701. They're for Go 1.19; Go 1.20 produces slightly better asm for the old code, but still produces terrible asm on 32-bit platforms. See also golang/go#56954. name old speed new speed delta Encoder_EncodeAllXML-8 283MB/s ± 1% 284MB/s ± 0% ~ (p=0.026 n=30+20) Encoder_EncodeAllSimple/fastest-8 111MB/s ± 0% 111MB/s ± 1% ~ (p=0.011 n=28+20) Encoder_EncodeAllSimple/default-8 78.4MB/s ± 1% 78.3MB/s ± 1% ~ (p=0.572 n=30+19) Encoder_EncodeAllSimple/better-8 65.9MB/s ± 1% 66.2MB/s ± 1% +0.53% (p=0.009 n=30+20) Encoder_EncodeAllSimple/best-8 11.1MB/s ± 1% 11.6MB/s ± 3% +4.42% (p=0.000 n=27+28) Encoder_EncodeAllSimple4K/fastest-8 911MB/s ± 1% 914MB/s ± 1% +0.31% (p=0.004 n=29+20) Encoder_EncodeAllSimple4K/default-8 73.1MB/s ± 1% 73.6MB/s ± 1% +0.67% (p=0.000 n=29+20) Encoder_EncodeAllSimple4K/better-8 60.5MB/s ± 1% 62.7MB/s ± 1% +3.64% (p=0.000 n=29+17) Encoder_EncodeAllSimple4K/best-8 8.62MB/s ± 3% 10.11MB/s ± 1% +17.24% (p=0.000 n=30+27) Encoder_EncodeAllHTML-8 133MB/s ± 1% 133MB/s ± 1% ~ (p=0.101 n=30+19) Encoder_EncodeAllTwain-8 84.8MB/s ± 1% 86.2MB/s ± 3% +1.63% (p=0.000 n=24+20) Encoder_EncodeAllPi-8 62.6MB/s ± 1% 62.7MB/s ± 0% ~ (p=0.102 n=30+20) Random4KEncodeAllFastest-8 2.50GB/s ± 1% 2.50GB/s ± 1% ~ (p=0.449 n=29+20) Random10MBEncodeAllFastest-8 2.39GB/s ± 2% 2.52GB/s ± 6% +5.23% (p=0.000 n=27+20) name old alloc/op new alloc/op delta Encoder_EncodeAllXML-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple/fastest-8 2.73B ±27% 3.00B ± 0% ~ (p=0.018 n=30+18) Encoder_EncodeAllSimple/default-8 4.00B ± 0% 4.00B ± 0% ~ (all equal) Encoder_EncodeAllSimple/better-8 5.00B ± 0% 5.00B ± 0% ~ (all equal) Encoder_EncodeAllSimple/best-8 19.5B ± 3% 19.0B ± 0% -2.40% (p=0.000 n=30+24) Encoder_EncodeAllSimple4K/fastest-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/default-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/better-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllSimple4K/best-8 2.00B ± 0% 1.43B ±40% -28.33% (p=0.000 n=30+30) Encoder_EncodeAllHTML-8 2.37B ±27% 2.25B ±33% ~ (p=0.398 n=30+20) Encoder_EncodeAllTwain-8 0.00B 0.00B ~ (all equal) Encoder_EncodeAllPi-8 12.4B ± 5% 12.2B ± 6% ~ (p=0.283 n=30+20) Random4KEncodeAllFastest-8 0.00B 0.00B ~ (all equal) Random10MBEncodeAllFastest-8 31.9kB ± 2% 30.5kB ± 9% -4.27% (p=0.002 n=28+20)
1 parent 6efddf2 commit b15768a

File tree

4 files changed

+7
-21
lines changed

4 files changed

+7
-21
lines changed

zstd/enc_best.go

+2-6
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,8 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
8787
// Protect against e.cur wraparound.
8888
for e.cur >= bufferReset {
8989
if len(e.hist) == 0 {
90-
for i := range e.table[:] {
91-
e.table[i] = prevEntry{}
92-
}
93-
for i := range e.longTable[:] {
94-
e.longTable[i] = prevEntry{}
95-
}
90+
e.table = [bestShortTableSize]prevEntry{}
91+
e.longTable = [bestLongTableSize]prevEntry{}
9692
e.cur = e.maxMatchOff
9793
break
9894
}

zstd/enc_better.go

+2-6
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,8 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
6464
// Protect against e.cur wraparound.
6565
for e.cur >= bufferReset {
6666
if len(e.hist) == 0 {
67-
for i := range e.table[:] {
68-
e.table[i] = tableEntry{}
69-
}
70-
for i := range e.longTable[:] {
71-
e.longTable[i] = prevEntry{}
72-
}
67+
e.table = [betterShortTableSize]tableEntry{}
68+
e.longTable = [betterLongTableSize]prevEntry{}
7369
e.cur = e.maxMatchOff
7470
break
7571
}

zstd/enc_dfast.go

+2-6
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,8 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
4646
// Protect against e.cur wraparound.
4747
for e.cur >= bufferReset {
4848
if len(e.hist) == 0 {
49-
for i := range e.table[:] {
50-
e.table[i] = tableEntry{}
51-
}
52-
for i := range e.longTable[:] {
53-
e.longTable[i] = tableEntry{}
54-
}
49+
e.table = [dFastShortTableSize]tableEntry{}
50+
e.longTable = [dFastLongTableSize]tableEntry{}
5551
e.cur = e.maxMatchOff
5652
break
5753
}

zstd/enc_fast.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
557557
// Protect against e.cur wraparound.
558558
for e.cur >= bufferReset {
559559
if len(e.hist) == 0 {
560-
for i := range e.table[:] {
561-
e.table[i] = tableEntry{}
562-
}
560+
e.table = [tableSize]tableEntry{}
563561
e.cur = e.maxMatchOff
564562
break
565563
}

0 commit comments

Comments
 (0)