Skip to content

Commit

Permalink
update benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangli20 committed Aug 1, 2024
1 parent ad856f8 commit c4fce1f
Show file tree
Hide file tree
Showing 5 changed files with 299 additions and 244 deletions.
47 changes: 31 additions & 16 deletions .github/workflows/enwik8-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,35 @@ jobs:
- name: Install zstd
run: sudo apt-get -y update && sudo apt-get -y install zstd

- name: Install brotli
run: sudo apt-get -y update && sudo apt-get -y install brotli

- name: Preparing enwik8
run: xz -d < test/enwik8.xz >enwik8

- name: Benchmark with with enwik8
run: |
(time gzip < enwik8 >enwik8.gz) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >gzip.enc_time
(time gzip -9 < enwik8 >enwik8.gz) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >gzip.enc_time
(time gzip -d < enwik8.gz >enwik8.ungz) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >gzip.dec_time
(time bzip2 < enwik8 >enwik8.bz2) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >bzip2.enc_time
(time bzip2 -9 < enwik8 >enwik8.bz2) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >bzip2.enc_time
(time bzip2 -d < enwik8.bz2 >enwik8.unbz2) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >bzip2.dec_time
(time zstd -12 < enwik8 >enwik8.zstd12) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd12.enc_time
(time zstd -d < enwik8.zstd12 >enwik8.unzstd12) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd12.dec_time
(time zstd -10 < enwik8 >enwik8.zstd10) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd10.enc_time
(time zstd -d < enwik8.zstd10 >enwik8.unzstd10) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd10.dec_time
(time zstd -15 < enwik8 >enwik8.zstd15) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd15.enc_time
(time zstd -d < enwik8.zstd15 >enwik8.unzstd15) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd15.dec_time
(time zstd -19 < enwik8 >enwik8.zstd19) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd19.enc_time
(time zstd -d < enwik8.zstd19 >enwik8.unzstd19) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >zstd19.dec_time
(time brotli -6 < enwik8 >enwik8.br6) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >br6.enc_time
(time brotli -d < enwik8.br6 >enwik8.unbr6) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >br6.dec_time
(time brotli -9 < enwik8 >enwik8.br9) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >br9.enc_time
(time brotli -d < enwik8.br9 >enwik8.unbr9) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >br9.dec_time
(time xz -6 < enwik8 >enwik8.xz) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >xz.enc_time
(time xz -d < enwik8.xz >enwik8.unxz) 2>&1 | grep -Poi 'real\s+\K[0-9.ms]+' >xz.dec_time
Expand All @@ -59,15 +71,18 @@ jobs:
- name: "==================== Benchmark Result ===================="
run: |
echo ""
printf "+----------+------------------+--------------+--------------+\n"
printf "| | %16s | %12s | %12s |\n" "Compressed Size" "Encode time" "Decode time"
printf "|----------+------------------+--------------+--------------|\n"
printf "| gzip | %16s | %12s | %12s |\n" "$(wc -c < enwik8.gz | grep -Poi '\d+')" "$(cat < gzip.enc_time)" "$(cat < gzip.dec_time)"
printf "| bzip2 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.bz2 | grep -Poi '\d+')" "$(cat < bzip2.enc_time)" "$(cat < bzip2.dec_time)"
printf "| zstd -12 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.zstd12 | grep -Poi '\d+')" "$(cat < zstd12.enc_time)" "$(cat < zstd12.dec_time)"
printf "| orz -l0 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.orz0 | grep -Poi '\d+')" "$(cat < orz0.enc_time)" "$(cat < orz0.dec_time)"
printf "| orz -l1 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.orz1 | grep -Poi '\d+')" "$(cat < orz1.enc_time)" "$(cat < orz1.dec_time)"
printf "| orz -l2 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.orz2 | grep -Poi '\d+')" "$(cat < orz2.enc_time)" "$(cat < orz2.dec_time)"
printf "| zstd -19 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.zstd19 | grep -Poi '\d+')" "$(cat < zstd19.enc_time)" "$(cat < zstd19.dec_time)"
printf "| xz -6 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.xz | grep -Poi '\d+')" "$(cat < xz.enc_time)" "$(cat < xz.dec_time)"
printf "+----------+------------------+--------------+--------------+\n"
printf "| | %16s | %12s | %12s |\n" "Compressed Size" "Encode time" "Decode time"
printf "|-----------|------------------|--------------|--------------|\n"
(
printf "| gzip -9 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.gz | grep -Poi '\d+')" "$(cat < gzip.enc_time)" "$(cat < gzip.dec_time)"
printf "| bzip2 -9 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.bz2 | grep -Poi '\d+')" "$(cat < bzip2.enc_time)" "$(cat < bzip2.dec_time)"
printf "| brotli -6 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.br6 | grep -Poi '\d+')" "$(cat < br6.enc_time)" "$(cat < br6.dec_time)"
printf "| brotli -9 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.br9 | grep -Poi '\d+')" "$(cat < br9.enc_time)" "$(cat < br9.dec_time)"
printf "| zstd -10 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.zstd10 | grep -Poi '\d+')" "$(cat < zstd10.enc_time)" "$(cat < zstd10.dec_time)"
printf "| zstd -15 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.zstd15 | grep -Poi '\d+')" "$(cat < zstd15.enc_time)" "$(cat < zstd15.dec_time)"
printf "| zstd -19 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.zstd19 | grep -Poi '\d+')" "$(cat < zstd19.enc_time)" "$(cat < zstd19.dec_time)"
printf "| orz -l0 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.orz0 | grep -Poi '\d+')" "$(cat < orz0.enc_time)" "$(cat < orz0.dec_time)"
printf "| orz -l1 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.orz1 | grep -Poi '\d+')" "$(cat < orz1.enc_time)" "$(cat < orz1.dec_time)"
printf "| orz -l2 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.orz2 | grep -Poi '\d+')" "$(cat < orz2.enc_time)" "$(cat < orz2.dec_time)"
printf "| xz -6 | %16s | %12s | %12s |\n" "$(wc -c < enwik8.xz | grep -Poi '\d+')" "$(cat < xz.enc_time)" "$(cat < xz.dec_time)"
) | sort -n -k4
43 changes: 13 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,33 +38,16 @@ benchmark for 100MB of Large Text Compression Benchmark (enwik8, see http://matt

| name |compressed size|encode time|decode time|
|-----------|---------------|-----------|-----------|
| xz -6 | 26,375,764 | 71.601s | 1.563s |
|**orz -l2**| 27,100,084 | 8.151s | 1.299s |
|**orz -l1**| 27,381,156 | 6.812s | 1.314s |
| zstd -19 | 27,659,082 | 52.844s | 0.244s |
|**orz -l0**| 28,025,726 | 5.630s | 1.327s |
| bzip2 -9 | 29,008,758 | 8.385s | 3.922s |
| brotli -9 | 29,685,672 | 35.458s | 0.340s |
| brotli -8 | 30,326,580 | 20.501s | 0.311s |
| zstd -15 | 30,328,568 | 23.030s | 0.195s |
| brotli -7 | 31,057,759 | 12.267s | 0.307s |
| zstd -11 | 31,230,229 | 8.992s | 0.206s |
| lzfse | 36,157,828 | 1.976s | 0.176s |
| gzip -6 | 36,518,322 | 4.948s | 0.672s |

benchmark for 400MB of text data of Global Data Compression Competition (TS40.txt, see https://globalcompetition.compression.ru/#leaderboards):
| name |compressed size|encode time|decode time|
|-----------|---------------|-----------|-----------|
| xz -6 | 108,677,096 | 335.738s | 5.887s |
| bzip2 -9 | 109,502,210 | 35.331s | 15.986s |
|**orz -l2**| 111,844,429 | 31.955s | 4.890s |
| zstd -19 | 112,679,835 | 252.155s | 1.050s |
|**orz -l1**| 113,065,821 | 26.168s | 4.799s |
|**orz -l0**| 116,003,142 | 20.172s | 4.785s |
| zstd -15 | 123,100,586 | 110.805s | 0.878s |
| brotli -9 | 124,453,389 | 144.100s | 1.422s |
| brotli -8 | 126,791,079 | 78.620s | 1.281s |
| zstd -11 | 127,940,149 | 40.962s | 0.827s |
| brotli -7 | 129,425,945 | 45.338s | 1.245s |
| gzip -6 | 146,656,915 | 25.237s | 2.662s |
| lzfse | 147,579,002 | 8.220s | 0.832s |
| xz -6 | 26,665,156 | 69.815s | 1.309s |
|**orz -l2**| 26,893,684 | 8.245s | 1.414s |
| zstd -19 | 26,942,199 | 62.931s | 0.239s |
|**orz -l1**| 27,220,056 | 6.714s | 1.393s |
|**orz -l0**| 27,896,572 | 5.209s | 1.405s |
| bzip2 -9 | 29,008,758 | 7.417s | 3.538s |
| zstd -15 | 29,544,237 | 29.860s | 0.196s |
| brotli -9 | 29,685,672 | 36.147s | 0.285s |
| brotli -8 | 30,326,580 | 17.989s | 0.271s |
| zstd -10 | 30,697,144 | 4.205s | 0.192s |
| brotli -7 | 31,057,759 | 11.730s | 0.267s |
| lzfse | 36,157,828 | 1.762s | 0.179s |
| gzip -6 | 36,548,933 | 4.461s | 0.357s |
Loading

0 comments on commit c4fce1f

Please sign in to comment.