From 5512113b67f8ecc631b27c5f8a144122c2dbcf6b Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Sat, 19 Oct 2024 22:38:25 +0300 Subject: [PATCH] Use our own Discard for testing Turns out that io.Discard is implementing ReadFrom using a small buffer (8192), confusing our benchmarks. We copyBuffer with 1 MiB buffer, but io.Discard is using its own 8 KiB buffer to do huge amount of tiny reads. These tiny reads are extremely slow for reading compressed clusters, since we have to read and decompress the same cluster multiple times. With this change qcow2 zlib performance is 4 times better - it it still slow, but matches better the real performance. Before: % go test -bench Read BenchmarkRead0p/qcow2-12 14 78238414 ns/op 3430.99 MB/s 1051160 B/op 39 allocs/op BenchmarkRead0p/qcow2_zlib-12 14 78577923 ns/op 3416.17 MB/s 1051733 B/op 39 allocs/op BenchmarkRead50p/qcow2-12 21 54889353 ns/op 4890.48 MB/s 1183231 B/op 45 allocs/op BenchmarkRead50p/qcow2_zlib-12 1 3466799292 ns/op 77.43 MB/s 736076536 B/op 178764 allocs/op BenchmarkRead100p/qcow2-12 38 30562127 ns/op 8783.27 MB/s 1182901 B/op 45 allocs/op BenchmarkRead100p/qcow2_zlib-12 1 6834526167 ns/op 39.28 MB/s 1471530256 B/op 357570 allocs/op After: % go test -bench Read BenchmarkRead0p/qcow2-12 14 77515735 ns/op 3462.98 MB/s 1050518 B/op 39 allocs/op BenchmarkRead0p/qcow2_zlib-12 14 77823402 ns/op 3449.29 MB/s 1050504 B/op 39 allocs/op BenchmarkRead50p/qcow2-12 24 48812158 ns/op 5499.36 MB/s 1181856 B/op 45 allocs/op BenchmarkRead50p/qcow2_zlib-12 2 899659187 ns/op 298.37 MB/s 184996316 B/op 43247 allocs/op BenchmarkRead100p/qcow2-12 61 19306020 ns/op 13904.24 MB/s 1181854 B/op 45 allocs/op BenchmarkRead100p/qcow2_zlib-12 1 1732168542 ns/op 154.97 MB/s 368850952 B/op 86460 allocs/op Signed-off-by: Nir Soffer --- qcow2reader_test.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/qcow2reader_test.go b/qcow2reader_test.go index a9916f4..b4e20a6 100644 --- a/qcow2reader_test.go +++ b/qcow2reader_test.go @@ -129,7 +129,7 @@ func benchmarkRead(b *testing.B, filename string) { defer img.Close() buf := make([]byte, 1*MiB) reader := io.NewSectionReader(img, 0, img.Size()) - n, err := io.CopyBuffer(io.Discard, reader, buf) + n, err := io.CopyBuffer(Discard, reader, buf) b.StopTimer() @@ -141,6 +141,17 @@ func benchmarkRead(b *testing.B, filename string) { } } +// We cannot use io.Discard since it implements ReadFrom using small buffers +// size (8192), confusing our test results. Reads smaller than cluster size (64 +// KiB) are extremely inefficient with compressed clusters. +type discard struct{} + +func (discard) Write(p []byte) (int, error) { + return len(p), nil +} + +var Discard = discard{} + func resetBenchmark(b *testing.B, size int64) { b.StopTimer() b.ResetTimer()