diff --git a/tests/testdata b/tests/testdata index b5eb9900ee21..25f480521dae 160000 --- a/tests/testdata +++ b/tests/testdata @@ -1 +1 @@ -Subproject commit b5eb9900ee2147b40d3e681fe86efa4fd693959a +Subproject commit 25f480521dae1937841bbcb034e862c4dfd53256 diff --git a/trie/database.go b/trie/database.go index b0fd78744492..6f4c379689ed 100644 --- a/trie/database.go +++ b/trie/database.go @@ -314,24 +314,24 @@ func (db *Database) InsertBlob(hash common.Hash, blob []byte) { db.lock.Lock() defer db.lock.Unlock() - db.insert(hash, blob, rawNode(blob)) + db.insert(hash, len(blob), rawNode(blob)) } // insert inserts a collapsed trie node into the memory database. This method is // a more generic version of InsertBlob, supporting both raw blob insertions as -// well ex trie node insertions. The blob must always be specified to allow proper +// well ex trie node insertions. The blob size must be specified to allow proper // size tracking. -func (db *Database) insert(hash common.Hash, blob []byte, node node) { +func (db *Database) insert(hash common.Hash, size int, node node) { // If the node's already cached, skip if _, ok := db.dirties[hash]; ok { return } - memcacheDirtyWriteMeter.Mark(int64(len(blob))) + memcacheDirtyWriteMeter.Mark(int64(size)) // Create the cached entry for this node entry := &cachedNode{ node: simplifyNode(node), - size: uint16(len(blob)), + size: uint16(size), flushPrev: db.newest, } for _, child := range entry.childs() { diff --git a/trie/hasher.go b/trie/hasher.go index 54f6a9de2b6a..b0eaf14205da 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -17,6 +17,7 @@ package trie import ( + "fmt" "hash" "sync" @@ -25,10 +26,17 @@ import ( "golang.org/x/crypto/sha3" ) +type Leaf struct { + size int + hash common.Hash + node node +} + type hasher struct { tmp sliceBuffer sha keccakState onleaf LeafCallback + leafCh chan *Leaf } // keccakState wraps sha3.state. In addition to the usual hash methods, it also supports @@ -63,10 +71,15 @@ var hasherPool = sync.Pool{ func newHasher(onleaf LeafCallback) *hasher { h := hasherPool.Get().(*hasher) h.onleaf = onleaf + if onleaf != nil { + h.leafCh = make(chan *Leaf, 200) // arbitrary number + } return h } func returnHasherToPool(h *hasher) { + h.onleaf = nil + h.leafCh = nil hasherPool.Put(h) } @@ -157,6 +170,37 @@ func (h *hasher) hashChildren(original node, db *Database) (node, node, error) { } } +// estimateSize estimates the size of an rlp-encoded node, without actually +// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie +// with 1000 leafs, the only errors above 1% are on small shortnodes, where this +// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35) +func estimateSize(n node) int { + switch n := n.(type) { + case *shortNode: + // A short node contains a compacted key, and a value. + return 3 + len(n.Key) + estimateSize(n.Val) + case *fullNode: + // A full node contains up to 16 hashes (some nils), and a key + s := 3 + for i := 0; i < 16; i++ { + if child := n.Children[i]; child != nil { + s += estimateSize(child) + } else { + s += 1 + } + } + return s + case valueNode: + return 1 + len(n) + case hashNode: + return 1 + len(n) + default: + panic(fmt.Sprintf("node type %T", n)) + + } + return 0 +} + // store hashes the node n and if we have a storage layer specified, it writes // the key/value pair to it and tracks any node->child references as well as any // node->external trie references. @@ -178,16 +222,56 @@ func (h *hasher) store(n node, db *Database, force bool) (node, error) { if hash == nil { hash = h.makeHashNode(h.tmp) } + // DEBUG todo remove me + // When we already have a cached hash, there's no need to rlp-encode the + // blob -- however, we stil need to use report the size to the database. + //actual := len(h.tmp) + //got := estimateSize(n) + //diff := math.Abs(1.0-float64(got)/float64(actual)) * 100 + //if diff > 1.0 { + // fmt.Printf("actual: %d, got %d, diff : %02f%% (%T)\n", actual, got, diff, n) + //} - if db != nil { - // We are pooling the trie nodes into an intermediate memory cache - hash := common.BytesToHash(hash) - + // If we're using channel-based leaf-reporting, send to channel. + // The leaf channel will be active only when there an active leaf-callback + if h.leafCh != nil { + h.leafCh <- &Leaf{ + size: len(h.tmp), + hash: common.BytesToHash(hash), + node: n, + } + } else if db != nil { + // No leaf-callback used, but there's still a database. Do serial + // insertion db.lock.Lock() - db.insert(hash, h.tmp, n) + db.insert(common.BytesToHash(hash), len(h.tmp), n) db.lock.Unlock() - // Track external references from account->storage trie + } + return hash, nil +} + +func (h *hasher) makeHashNode(data []byte) hashNode { + n := make(hashNode, h.sha.Size()) + h.sha.Reset() + h.sha.Write(data) + h.sha.Read(n) + return n +} + +// commitLoop does the actual insert + leaf callback for nodes +func (h *hasher) commitLoop(db *Database, wg *sync.WaitGroup) { + defer wg.Done() + for item := range h.leafCh { + var ( + hash = item.hash + size = item.size + n = item.node + ) + // We are pooling the trie nodes into an intermediate memory cache + db.lock.Lock() + db.insert(hash, size, n) + db.lock.Unlock() if h.onleaf != nil { switch n := n.(type) { case *shortNode: @@ -203,13 +287,4 @@ func (h *hasher) store(n node, db *Database, force bool) (node, error) { } } } - return hash, nil -} - -func (h *hasher) makeHashNode(data []byte) hashNode { - n := make(hashNode, h.sha.Size()) - h.sha.Reset() - h.sha.Write(data) - h.sha.Read(n) - return n } diff --git a/trie/trie.go b/trie/trie.go index 920e331fd62f..60a96b02a3ef 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -20,6 +20,7 @@ package trie import ( "bytes" "fmt" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" @@ -415,7 +416,21 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) { if t.db == nil { panic("commit called on trie with nil database") } - hash, cached, err := t.hashRoot(t.db, onleaf) + if t.root == nil { + return emptyRoot, nil + } + h := newHasher(onleaf) + defer returnHasherToPool(h) + var wg sync.WaitGroup + if onleaf != nil { + wg.Add(1) + go h.commitLoop(t.db, &wg) + } + hash, cached, err := h.hash(t.root, t.db, true) + if onleaf != nil { + close(h.leafCh) + wg.Wait() + } if err != nil { return common.Hash{}, err } diff --git a/trie/trie_test.go b/trie/trie_test.go index e53ac568e9c3..7cbf1862a255 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -512,6 +512,147 @@ func BenchmarkHash(b *testing.B) { trie.Hash() } +type account struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + Code []byte +} + +// Benchmarks the trie Commit following a Hash. Since the trie caches the result of any operation, +// we cannot use b.N as the number of hashing rouns, since all rounds apart from +// the first one will be NOOP. As such, we'll use b.N as the number of account to +// insert into the trie before measuring the hashing. +func BenchmarkCommitAfterHash(b *testing.B) { + b.Run("no-onleaf", func(b *testing.B) { + benchmarkCommitAfterHash(b, nil) + }) + onleaf := func(leaf []byte, parent common.Hash) error { + var a account + rlp.DecodeBytes(leaf, &a) + return nil + } + b.Run("with-onleaf", func(b *testing.B) { + benchmarkCommitAfterHash(b, onleaf) + }) +} + +func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) { + // Make the random benchmark deterministic + random := rand.New(rand.NewSource(0)) + // Create a realistic account trie to hash + addresses := make([][20]byte, b.N) + for i := 0; i < len(addresses); i++ { + for j := 0; j < len(addresses[i]); j++ { + addresses[i][j] = byte(random.Intn(256)) + } + } + accounts := make([][]byte, len(addresses)) + for i := 0; i < len(accounts); i++ { + var ( + nonce = uint64(random.Int63()) + balance = new(big.Int).Rand(random, new(big.Int).Exp(common.Big2, common.Big256, nil)) + root = emptyRoot + code = crypto.Keccak256(nil) + ) + accounts[i], _ = rlp.EncodeToBytes(&account{nonce, balance, root, code}) + } + trie := newEmpty() + for i := 0; i < len(addresses); i++ { + trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) + } + // Insert the accounts into the trie and hash it + trie.Hash() + b.ResetTimer() + b.ReportAllocs() + trie.Commit(onleaf) +} + +func TestCommitAfterHash(t *testing.T) { + // Create a realistic account trie to hash + addresses, accounts := makeAccounts(1000) + trie := newEmpty() + for i := 0; i < len(addresses); i++ { + trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) + } + // Insert the accounts into the trie and hash it + trie.Hash() + trie.Commit(nil) +} + +func makeAccounts(size int) (addresses [][20]byte, accounts [][]byte) { + // Make the random benchmark deterministic + random := rand.New(rand.NewSource(0)) + // Create a realistic account trie to hash + addresses = make([][20]byte, size) + for i := 0; i < len(addresses); i++ { + for j := 0; j < len(addresses[i]); j++ { + addresses[i][j] = byte(random.Intn(256)) + } + } + accounts = make([][]byte, len(addresses)) + for i := 0; i < len(accounts); i++ { + var ( + nonce = uint64(random.Int63()) + balance = new(big.Int).Rand(random, new(big.Int).Exp(common.Big2, common.Big256, nil)) + root = emptyRoot + code = crypto.Keccak256(nil) + ) + accounts[i], _ = rlp.EncodeToBytes(&account{nonce, balance, root, code}) + } + return addresses, accounts +} + +// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie. +// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically, +// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple +// of thousand entries) +// +func BenchmarkCommitAfterHashFixedSize(b *testing.B) { + b.Run("10", func(b *testing.B) { + b.StopTimer() + acc, add := makeAccounts(20) + for i := 0; i < b.N; i++ { + benchmarkCommitAfterHashFixedSize(b, acc, add) + } + }) + b.Run("100", func(b *testing.B) { + b.StopTimer() + acc, add := makeAccounts(100) + for i := 0; i < b.N; i++ { + benchmarkCommitAfterHashFixedSize(b, acc, add) + } + }) + + b.Run("1000", func(b *testing.B) { + b.StopTimer() + acc, add := makeAccounts(1000) + for i := 0; i < b.N; i++ { + benchmarkCommitAfterHashFixedSize(b, acc, add) + } + }) + b.Run("10000", func(b *testing.B) { + b.StopTimer() + acc, add := makeAccounts(10000) + for i := 0; i < b.N; i++ { + benchmarkCommitAfterHashFixedSize(b, acc, add) + } + }) +} + +func benchmarkCommitAfterHashFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) { + b.ReportAllocs() + trie := newEmpty() + for i := 0; i < len(addresses); i++ { + trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i]) + } + // Insert the accounts into the trie and hash it + trie.Hash() + b.StartTimer() + trie.Commit(nil) + b.StopTimer() +} + func tempDB() (string, *Database) { dir, err := ioutil.TempDir("", "trie-bench") if err != nil {