From 5512113b67f8ecc631b27c5f8a144122c2dbcf6b Mon Sep 17 00:00:00 2001
From: Nir Soffer <nsoffer@redhat.com>
Date: Sat, 19 Oct 2024 22:38:25 +0300
Subject: [PATCH] Use our own Discard for testing

Turns out that io.Discard is implementing ReadFrom using a small buffer
(8192), confusing our benchmarks. We copyBuffer with 1 MiB buffer, but
io.Discard is using its own 8 KiB buffer to do huge amount of tiny
reads. These tiny reads are extremely slow for reading compressed
clusters, since we have to read and decompress the same cluster multiple
times.

With this change qcow2 zlib performance is 4 times better - it it still
slow, but matches better the real performance.

Before:

    % go test -bench Read
    BenchmarkRead0p/qcow2-12          14      78238414 ns/op     3430.99 MB/s      1051160 B/op        39 allocs/op
    BenchmarkRead0p/qcow2_zlib-12     14      78577923 ns/op     3416.17 MB/s      1051733 B/op        39 allocs/op
    BenchmarkRead50p/qcow2-12         21      54889353 ns/op     4890.48 MB/s      1183231 B/op        45 allocs/op
    BenchmarkRead50p/qcow2_zlib-12     1    3466799292 ns/op       77.43 MB/s    736076536 B/op    178764 allocs/op
    BenchmarkRead100p/qcow2-12        38      30562127 ns/op     8783.27 MB/s      1182901 B/op        45 allocs/op
    BenchmarkRead100p/qcow2_zlib-12    1    6834526167 ns/op       39.28 MB/s   1471530256 B/op    357570 allocs/op

After:

    % go test -bench Read
    BenchmarkRead0p/qcow2-12          14      77515735 ns/op     3462.98 MB/s      1050518 B/op        39 allocs/op
    BenchmarkRead0p/qcow2_zlib-12     14      77823402 ns/op     3449.29 MB/s      1050504 B/op        39 allocs/op
    BenchmarkRead50p/qcow2-12         24      48812158 ns/op     5499.36 MB/s      1181856 B/op        45 allocs/op
    BenchmarkRead50p/qcow2_zlib-12     2     899659187 ns/op      298.37 MB/s    184996316 B/op     43247 allocs/op
    BenchmarkRead100p/qcow2-12        61      19306020 ns/op    13904.24 MB/s      1181854 B/op        45 allocs/op
    BenchmarkRead100p/qcow2_zlib-12    1    1732168542 ns/op      154.97 MB/s    368850952 B/op     86460 allocs/op

Signed-off-by: Nir Soffer <nsoffer@redhat.com>
---
 qcow2reader_test.go | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/qcow2reader_test.go b/qcow2reader_test.go
index a9916f4..b4e20a6 100644
--- a/qcow2reader_test.go
+++ b/qcow2reader_test.go
@@ -129,7 +129,7 @@ func benchmarkRead(b *testing.B, filename string) {
 	defer img.Close()
 	buf := make([]byte, 1*MiB)
 	reader := io.NewSectionReader(img, 0, img.Size())
-	n, err := io.CopyBuffer(io.Discard, reader, buf)
+	n, err := io.CopyBuffer(Discard, reader, buf)
 
 	b.StopTimer()
 
@@ -141,6 +141,17 @@ func benchmarkRead(b *testing.B, filename string) {
 	}
 }
 
+// We cannot use io.Discard since it implements ReadFrom using small buffers
+// size (8192), confusing our test results. Reads smaller than cluster size (64
+// KiB) are extremely inefficient with compressed clusters.
+type discard struct{}
+
+func (discard) Write(p []byte) (int, error) {
+	return len(p), nil
+}
+
+var Discard = discard{}
+
 func resetBenchmark(b *testing.B, size int64) {
 	b.StopTimer()
 	b.ResetTimer()