From 289d06da30e0e8cffd67e1ac9261083950b9d8d9 Mon Sep 17 00:00:00 2001 From: reus Date: Sat, 9 Mar 2024 14:24:46 +0800 Subject: [PATCH] optimize sql.HashOf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * pool *xxhash.Digest objects * use fmt.Fprintf to write to hash benchmark stats oos: linux goarch: amd64 pkg: github.com/dolthub/go-mysql-server/sql cpu: AMD Ryzen 9 7900 12-Core Processor │ b1 │ b2 │ │ sec/op │ sec/op vs base │ HashOf-24 79.65n ± 4% 70.86n ± 7% -11.03% (p=0.002 n=6) ParallelHashOf-24 10.47n ± 4% 11.85n ± 19% ~ (p=0.368 n=6) geomean 28.88n 28.98n +0.32% │ b1 │ b2 │ │ B/op │ B/op vs base │ HashOf-24 4.000 ± 0% 0.000 ± 0% -100.00% (p=0.002 n=6) ParallelHashOf-24 4.000 ± 0% 0.000 ± 0% -100.00% (p=0.002 n=6) geomean 4.000 ? ¹ ² ¹ summaries must be >0 to compute geomean ² ratios must be >0 to compute geomean │ b1 │ b2 │ │ allocs/op │ allocs/op vs base │ HashOf-24 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.002 n=6) ParallelHashOf-24 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.002 n=6) geomean 2.000 ? ¹ ² ¹ summaries must be >0 to compute geomean ² ratios must be >0 to compute geomean --- sql/cache.go | 13 +++++++++++-- sql/cache_test.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/sql/cache.go b/sql/cache.go index 32861e9e37..94759be248 100644 --- a/sql/cache.go +++ b/sql/cache.go @@ -17,6 +17,7 @@ package sql import ( "fmt" "runtime" + "sync" "github.com/cespare/xxhash/v2" @@ -25,7 +26,9 @@ import ( // HashOf returns a hash of the given value to be used as key in a cache. func HashOf(v Row) (uint64, error) { - hash := xxhash.New() + hash := digestPool.Get().(*xxhash.Digest) + hash.Reset() + defer digestPool.Put(hash) for i, x := range v { if i > 0 { // separate each value in the row with a nil byte @@ -38,13 +41,19 @@ func HashOf(v Row) (uint64, error) { // TODO: we don't have the type info necessary to appropriately encode the value of a string with a non-standard // collation, which means that two strings that differ only in their collations will hash to the same value. // See rowexec/grouping_key() - if _, err := hash.Write([]byte(fmt.Sprintf("%v,", x))); err != nil { + if _, err := fmt.Fprintf(hash, "%v,", x); err != nil { return 0, err } } return hash.Sum64(), nil } +var digestPool = sync.Pool{ + New: func() any { + return xxhash.New() + }, +} + // ErrKeyNotFound is returned when the key could not be found in the cache. var ErrKeyNotFound = fmt.Errorf("memory: key not found in cache") diff --git a/sql/cache_test.go b/sql/cache_test.go index 1f6dd58f43..c93e04ad1b 100644 --- a/sql/cache_test.go +++ b/sql/cache_test.go @@ -177,3 +177,33 @@ func TestRowsCache(t *testing.T) { require.True(freed) }) } + +func BenchmarkHashOf(b *testing.B) { + row := NewRow(1, "1") + b.ResetTimer() + for i := 0; i < b.N; i++ { + sum, err := HashOf(row) + if err != nil { + b.Fatal(err) + } + if sum != 11268758894040352165 { + b.Fatalf("got %v", sum) + } + } +} + +func BenchmarkParallelHashOf(b *testing.B) { + row := NewRow(1, "1") + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + sum, err := HashOf(row) + if err != nil { + b.Fatal(err) + } + if sum != 11268758894040352165 { + b.Fatalf("got %v", sum) + } + } + }) +}