Use our own Discard for testing

Turns out that io.Discard is implementing ReadFrom using a small buffer (8192), confusing our benchmarks. We copyBuffer with 1 MiB buffer, but io.Discard is using its own 8 KiB buffer to do huge amount of tiny reads. These tiny reads are extremely slow for reading compressed clusters, since we have to read and decompress the same cluster multiple times. With this change qcow2 zlib performance is 4 times better - it it still slow, but matches better the real performance. Before: % go test -bench Read BenchmarkRead0p/qcow2-12 14 78238414 ns/op 3430.99 MB/s 1051160 B/op 39 allocs/op BenchmarkRead0p/qcow2_zlib-12 14 78577923 ns/op 3416.17 MB/s 1051733 B/op 39 allocs/op BenchmarkRead50p/qcow2-12 21 54889353 ns/op 4890.48 MB/s 1183231 B/op 45 allocs/op BenchmarkRead50p/qcow2_zlib-12 1 3466799292 ns/op 77.43 MB/s 736076536 B/op 178764 allocs/op BenchmarkRead100p/qcow2-12 38 30562127 ns/op 8783.27 MB/s 1182901 B/op 45 allocs/op BenchmarkRead100p/qcow2_zlib-12 1 6834526167 ns/op 39.28 MB/s 1471530256 B/op 357570 allocs/op After: % go test -bench Read BenchmarkRead0p/qcow2-12 14 77515735 ns/op 3462.98 MB/s 1050518 B/op 39 allocs/op BenchmarkRead0p/qcow2_zlib-12 14 77823402 ns/op 3449.29 MB/s 1050504 B/op 39 allocs/op BenchmarkRead50p/qcow2-12 24 48812158 ns/op 5499.36 MB/s 1181856 B/op 45 allocs/op BenchmarkRead50p/qcow2_zlib-12 2 899659187 ns/op 298.37 MB/s 184996316 B/op 43247 allocs/op BenchmarkRead100p/qcow2-12 61 19306020 ns/op 13904.24 MB/s 1181854 B/op 45 allocs/op BenchmarkRead100p/qcow2_zlib-12 1 1732168542 ns/op 154.97 MB/s 368850952 B/op 86460 allocs/op Signed-off-by: Nir Soffer <[email protected]>
lima-vm · Oct 19, 2024 · 5512113 · 5512113
1 parent 5641962
commit 5512113
Showing 1 changed file with 12 additions and 1 deletion.
diff --git a/qcow2reader_test.go b/qcow2reader_test.go
@@ -129,7 +129,7 @@ func benchmarkRead(b *testing.B, filename string) {
 	defer img.Close()
 	buf := make([]byte, 1*MiB)
 	reader := io.NewSectionReader(img, 0, img.Size())
-	n, err := io.CopyBuffer(io.Discard, reader, buf)
+	n, err := io.CopyBuffer(Discard, reader, buf)
 
 	b.StopTimer()
 
@@ -141,6 +141,17 @@ func benchmarkRead(b *testing.B, filename string) {
 	}
 }
 
+// We cannot use io.Discard since it implements ReadFrom using small buffers
+// size (8192), confusing our test results. Reads smaller than cluster size (64
+// KiB) are extremely inefficient with compressed clusters.
+type discard struct{}
+
+func (discard) Write(p []byte) (int, error) {
+	return len(p), nil
+}
+
+var Discard = discard{}
+
 func resetBenchmark(b *testing.B, size int64) {
 	b.StopTimer()
 	b.ResetTimer()