Skip to content

core/state: optimize Set#33544

Closed
cuiweixie wants to merge 1 commit intoethereum:masterfrom
cuiweixie:storage
Closed

core/state: optimize Set#33544
cuiweixie wants to merge 1 commit intoethereum:masterfrom
cuiweixie:storage

Conversation

@cuiweixie
Copy link
Copy Markdown
Contributor

benchmark code:

// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package state

import (
	"testing"

	"github.com/ethereum/go-ethereum/common"
	"github.com/ethereum/go-ethereum/crypto"
)

// BenchmarkTransientStorageSet_NewAddress benchmarks setting a value for a new address
// This tests the optimization where we cache t[addr] to avoid multiple map lookups
func BenchmarkTransientStorageSet_NewAddress(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key := common.HexToHash("0x01")
	value := common.HexToHash("0x1234")

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		ts.Set(addr, key, value)
	}
}

// BenchmarkTransientStorageSet_ExistingAddress benchmarks setting a value for an existing address
// This tests the case where the address already exists in the map
func BenchmarkTransientStorageSet_ExistingAddress(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key := common.HexToHash("0x01")
	value := common.HexToHash("0x1234")

	// Pre-populate the address
	ts.Set(addr, key, value)

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		ts.Set(addr, key, value)
	}
}

// BenchmarkTransientStorageSet_UpdateValue benchmarks updating an existing key-value pair
// This tests updating within the same address
func BenchmarkTransientStorageSet_UpdateValue(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key := common.HexToHash("0x01")
	oldValue := common.HexToHash("0x1234")
	newValue := common.HexToHash("0x5678")

	// Pre-populate
	ts.Set(addr, key, oldValue)

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		ts.Set(addr, key, newValue)
	}
}

// BenchmarkTransientStorageSet_MultipleKeys benchmarks setting multiple keys for the same address
// This tests the optimization when multiple operations happen on the same address
func BenchmarkTransientStorageSet_MultipleKeys(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	keys := make([]common.Hash, 10)
	values := make([]common.Hash, 10)

	for i := 0; i < 10; i++ {
		keys[i] = common.BytesToHash(crypto.Keccak256([]byte{byte(i)}))
		values[i] = common.BytesToHash(crypto.Keccak256([]byte{byte(i + 100)}))
	}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		idx := i % 10
		ts.Set(addr, keys[idx], values[idx])
	}
}

// BenchmarkTransientStorageSet_Delete benchmarks deleting a key-value pair
// This tests the delete path where value == common.Hash{}
func BenchmarkTransientStorageSet_Delete(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key := common.HexToHash("0x01")
	value := common.HexToHash("0x1234")
	emptyValue := common.Hash{} // delete marker

	// Pre-populate
	ts.Set(addr, key, value)

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		ts.Set(addr, key, emptyValue)
		// Re-add for next iteration
		ts.Set(addr, key, value)
	}
}

// BenchmarkTransientStorageSet_DeleteLastKey benchmarks deleting the last key of an address
// This tests the path where len(storage) == 0 after delete, triggering delete(t, addr)
func BenchmarkTransientStorageSet_DeleteLastKey(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key := common.HexToHash("0x01")
	value := common.HexToHash("0x1234")
	emptyValue := common.Hash{}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		// Set and then delete (this is the only key, so it will trigger address deletion)
		ts.Set(addr, key, value)
		ts.Set(addr, key, emptyValue)
	}
}

// BenchmarkTransientStorageSet_DeleteNonLastKey benchmarks deleting a key when other keys exist
// This tests the delete path where len(storage) > 0 after delete
func BenchmarkTransientStorageSet_DeleteNonLastKey(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key1 := common.HexToHash("0x01")
	key2 := common.HexToHash("0x02")
	value := common.HexToHash("0x1234")
	emptyValue := common.Hash{}

	// Pre-populate with two keys
	ts.Set(addr, key1, value)
	ts.Set(addr, key2, value)

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		// Delete and re-add key1, key2 remains
		ts.Set(addr, key1, emptyValue)
		ts.Set(addr, key1, value)
	}
}

// BenchmarkTransientStorageSet_MultipleAddresses benchmarks setting values across multiple addresses
// This tests the optimization when switching between different addresses
func BenchmarkTransientStorageSet_MultipleAddresses(b *testing.B) {
	ts := newTransientStorage()
	addrs := make([]common.Address, 10)
	key := common.HexToHash("0x01")
	value := common.HexToHash("0x1234")

	for i := 0; i < 10; i++ {
		addrs[i] = common.BytesToAddress(crypto.Keccak256([]byte{byte(i)})[:20])
	}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		idx := i % 10
		ts.Set(addrs[idx], key, value)
	}
}

// BenchmarkTransientStorageSet_MixedOperations benchmarks mixed operations (set, update, delete)
// This simulates realistic usage patterns
func BenchmarkTransientStorageSet_MixedOperations(b *testing.B) {
	ts := newTransientStorage()
	addr1 := common.HexToAddress("0x01")
	addr2 := common.HexToAddress("0x02")
	key1 := common.HexToHash("0x01")
	key2 := common.HexToHash("0x02")
	value1 := common.HexToHash("0x1234")
	value2 := common.HexToHash("0x5678")
	emptyValue := common.Hash{}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		// Mixed operations: set, update, delete
		ts.Set(addr1, key1, value1)  // new address
		ts.Set(addr1, key2, value2)  // same address, new key
		ts.Set(addr1, key1, value2)  // update existing
		ts.Set(addr2, key1, value1)  // new address
		ts.Set(addr1, key2, emptyValue) // delete (non-last key)
		ts.Set(addr2, key1, emptyValue) // delete (last key, triggers addr deletion)
	}
}

// BenchmarkTransientStorageSet_SequentialSameAddress benchmarks sequential operations on same address
// This specifically tests the optimization benefit of caching storage = t[addr]
func BenchmarkTransientStorageSet_SequentialSameAddress(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	keys := make([]common.Hash, 100)
	values := make([]common.Hash, 100)

	for i := 0; i < 100; i++ {
		keys[i] = common.BytesToHash(crypto.Keccak256([]byte{byte(i)}))
		values[i] = common.BytesToHash(crypto.Keccak256([]byte{byte(i + 200)}))
	}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		// Perform many operations on the same address to test cached lookup benefit
		for j := 0; j < 100; j++ {
			ts.Set(addr, keys[j], values[j])
		}
	}
}

// BenchmarkTransientStorageSet_LargeStorage benchmarks with a large existing storage
// This tests performance when the address already has many keys
func BenchmarkTransientStorageSet_LargeStorage(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	newKey := common.HexToHash("0x9999")
	value := common.HexToHash("0x1234")

	// Pre-populate with many keys
	for i := 0; i < 1000; i++ {
		key := common.BytesToHash(crypto.Keccak256([]byte{byte(i)}))
		val := common.BytesToHash(crypto.Keccak256([]byte{byte(i + 1000)}))
		ts.Set(addr, key, val)
	}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		ts.Set(addr, newKey, value)
	}
}

// BenchmarkTransientStorageSet_AlternatingDelete benchmarks alternating between set and delete
// This tests the optimization in both code paths
func BenchmarkTransientStorageSet_AlternatingDelete(b *testing.B) {
	ts := newTransientStorage()
	addr := common.HexToAddress("0x01")
	key := common.HexToHash("0x01")
	value := common.HexToHash("0x1234")
	emptyValue := common.Hash{}

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		if i%2 == 0 {
			ts.Set(addr, key, value)
		} else {
			ts.Set(addr, key, emptyValue)
		}
	}
}

resulit in:

goos: darwin
goarch: arm64
pkg: github.com/ethereum/go-ethereum/core/state
cpu: Apple M4
                                             │   old.txt    │               new.txt               │
                                             │    sec/op    │   sec/op     vs base                │
TransientStorageSet_NewAddress-10              19.90n ±  1%   14.17n ± 1%  -28.79% (p=0.000 n=10)
TransientStorageSet_ExistingAddress-10         19.98n ±  1%   14.52n ± 1%  -27.35% (p=0.000 n=10)
TransientStorageSet_UpdateValue-10             19.91n ±  0%   14.54n ± 1%  -26.95% (p=0.000 n=10)
TransientStorageSet_MultipleKeys-10            20.15n ±  9%   14.81n ± 0%  -26.53% (p=0.000 n=10)
TransientStorageSet_Delete-10                  129.0n ±  0%   114.6n ± 7%  -11.16% (p=0.001 n=10)
TransientStorageSet_DeleteLastKey-10           131.4n ±  1%   116.6n ± 5%  -11.23% (p=0.000 n=10)
TransientStorageSet_DeleteNonLastKey-10        49.61n ±  0%   33.96n ± 0%  -31.54% (p=0.000 n=10)
TransientStorageSet_MultipleAddresses-10       21.03n ±  1%   20.91n ± 4%   -0.57% (p=0.014 n=10)
TransientStorageSet_MixedOperations-10         225.1n ±  1%   173.1n ± 0%  -23.08% (p=0.000 n=10)
TransientStorageSet_SequentialSameAddress-10   2.180µ ±  2%   1.605µ ± 6%  -26.40% (p=0.000 n=10)
TransientStorageSet_LargeStorage-10            20.53n ± 10%   15.28n ± 2%  -25.57% (p=0.000 n=10)
TransientStorageSet_AlternatingDelete-10       66.81n ±  1%   57.02n ± 4%  -14.65% (p=0.000 n=10)
geomean                                        59.32n         46.48n       -21.64%

                                             │   old.txt    │               new.txt               │
                                             │     B/op     │    B/op     vs base                 │
TransientStorageSet_NewAddress-10              0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_ExistingAddress-10         0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_UpdateValue-10             0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_MultipleKeys-10            0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_Delete-10                  624.0 ± 0%     624.0 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_DeleteLastKey-10           624.0 ± 0%     624.0 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_DeleteNonLastKey-10        0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_MultipleAddresses-10       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_MixedOperations-10         624.0 ± 0%     624.0 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_SequentialSameAddress-10   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_LargeStorage-10            0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_AlternatingDelete-10       312.0 ± 0%     312.0 ± 0%       ~ (p=1.000 n=10) ¹
geomean                                                   ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                                             │   old.txt    │               new.txt               │
                                             │  allocs/op   │ allocs/op   vs base                 │
TransientStorageSet_NewAddress-10              0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_ExistingAddress-10         0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_UpdateValue-10             0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_MultipleKeys-10            0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_Delete-10                  2.000 ± 0%     2.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_DeleteLastKey-10           2.000 ± 0%     2.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_DeleteNonLastKey-10        0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_MultipleAddresses-10       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_MixedOperations-10         2.000 ± 0%     2.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_SequentialSameAddress-10   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_LargeStorage-10            0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
TransientStorageSet_AlternatingDelete-10       1.000 ± 0%     1.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                                                   ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

Copy link
Copy Markdown
Member

@MariusVanDerWijden MariusVanDerWijden left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM, very interesting that there is such a big difference

@rjl493456442
Copy link
Copy Markdown
Member

Thanks for the contribution. While the unit tests show a significant speedup with these changes, the impact on overall performance appears to be negligible.

For that reason, we would prefer not to tweak minor code paths purely for performance-related optimizations.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants