Skip to content

Commit

Permalink
zstd: Improve better compression (#364)
Browse files Browse the repository at this point in the history
Try to find a better match by searching for a long match
at the end of the current best match

Before/after pairs.. Speed comparison not reliable, different Go versions.

```
silesia.tar zskp    3   211947520   65177448    1899    106.44
silesia.tar zskp    3   211947520   64595893    2007    100.68

gob-stream  zskp    3   1911399616  185792019   9324    195.48
gob-stream  zskp    3   1911399616  175034659   9636    189.17

enwik9  zskp    3   1000000000  294540704   11725   81.34
enwik9  zskp    3   1000000000  292243069   12162   78.41

github-june-2days-2019.json zskp    3   6273951764  537511906   29252   204.54
github-june-2days-2019.json zskp    3   6273951764  524340691   34043   175.75

rawstudio-mint14.tar    zskp    3   8558382592  3224594213  71751   113.75
rawstudio-mint14.tar    zskp    3   8558382592  3158085214  77675   105.08

nyc-taxi-data-10M.csv   zskp    3   3325605752  538490114   25683   123.49
nyc-taxi-data-10M.csv   zskp    3   3325605752  530289687   25239   125.66
```
  • Loading branch information
klauspost authored Apr 23, 2021
1 parent 6546703 commit 9bb6b77
Showing 1 changed file with 35 additions and 2 deletions.
37 changes: 35 additions & 2 deletions zstd/enc_better.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,41 @@ encodeLoop:
cv = load6432(src, s)
}

// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
// Try to find a better match by searching for a long match at the end of the current best match
if true && s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}

// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t

Expand Down

0 comments on commit 9bb6b77

Please sign in to comment.