Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/testdata
10 changes: 5 additions & 5 deletions trie/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,24 +314,24 @@ func (db *Database) InsertBlob(hash common.Hash, blob []byte) {
db.lock.Lock()
defer db.lock.Unlock()

db.insert(hash, blob, rawNode(blob))
db.insert(hash, len(blob), rawNode(blob))
}

// insert inserts a collapsed trie node into the memory database. This method is
// a more generic version of InsertBlob, supporting both raw blob insertions as
// well ex trie node insertions. The blob must always be specified to allow proper
// well ex trie node insertions. The blob size must be specified to allow proper
// size tracking.
func (db *Database) insert(hash common.Hash, blob []byte, node node) {
func (db *Database) insert(hash common.Hash, size int, node node) {
// If the node's already cached, skip
if _, ok := db.dirties[hash]; ok {
return
}
memcacheDirtyWriteMeter.Mark(int64(len(blob)))
memcacheDirtyWriteMeter.Mark(int64(size))

// Create the cached entry for this node
entry := &cachedNode{
node: simplifyNode(node),
size: uint16(len(blob)),
size: uint16(size),
flushPrev: db.newest,
}
for _, child := range entry.childs() {
Expand Down
105 changes: 90 additions & 15 deletions trie/hasher.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package trie

import (
"fmt"
"hash"
"sync"

Expand All @@ -25,10 +26,17 @@ import (
"golang.org/x/crypto/sha3"
)

type Leaf struct {
size int
hash common.Hash
node node
}

type hasher struct {
tmp sliceBuffer
sha keccakState
onleaf LeafCallback
leafCh chan *Leaf
}

// keccakState wraps sha3.state. In addition to the usual hash methods, it also supports
Expand Down Expand Up @@ -63,10 +71,15 @@ var hasherPool = sync.Pool{
func newHasher(onleaf LeafCallback) *hasher {
h := hasherPool.Get().(*hasher)
h.onleaf = onleaf
if onleaf != nil {
h.leafCh = make(chan *Leaf, 200) // arbitrary number
}
return h
}

func returnHasherToPool(h *hasher) {
h.onleaf = nil
h.leafCh = nil
hasherPool.Put(h)
}

Expand Down Expand Up @@ -157,6 +170,37 @@ func (h *hasher) hashChildren(original node, db *Database) (node, node, error) {
}
}

// estimateSize estimates the size of an rlp-encoded node, without actually
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
func estimateSize(n node) int {
switch n := n.(type) {
case *shortNode:
// A short node contains a compacted key, and a value.
return 3 + len(n.Key) + estimateSize(n.Val)
case *fullNode:
// A full node contains up to 16 hashes (some nils), and a key
s := 3
for i := 0; i < 16; i++ {
if child := n.Children[i]; child != nil {
s += estimateSize(child)
} else {
s += 1
}
}
return s
case valueNode:
return 1 + len(n)
case hashNode:
return 1 + len(n)
default:
panic(fmt.Sprintf("node type %T", n))

}
return 0
}

// store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any
// node->external trie references.
Expand All @@ -178,16 +222,56 @@ func (h *hasher) store(n node, db *Database, force bool) (node, error) {
if hash == nil {
hash = h.makeHashNode(h.tmp)
}
// DEBUG todo remove me
// When we already have a cached hash, there's no need to rlp-encode the
// blob -- however, we stil need to use report the size to the database.
//actual := len(h.tmp)
//got := estimateSize(n)
//diff := math.Abs(1.0-float64(got)/float64(actual)) * 100
//if diff > 1.0 {
// fmt.Printf("actual: %d, got %d, diff : %02f%% (%T)\n", actual, got, diff, n)
//}

if db != nil {
// We are pooling the trie nodes into an intermediate memory cache
hash := common.BytesToHash(hash)

// If we're using channel-based leaf-reporting, send to channel.
// The leaf channel will be active only when there an active leaf-callback
if h.leafCh != nil {
h.leafCh <- &Leaf{
size: len(h.tmp),
hash: common.BytesToHash(hash),
node: n,
}
} else if db != nil {
// No leaf-callback used, but there's still a database. Do serial
// insertion
db.lock.Lock()
db.insert(hash, h.tmp, n)
db.insert(common.BytesToHash(hash), len(h.tmp), n)
db.lock.Unlock()

// Track external references from account->storage trie
}
return hash, nil
}

func (h *hasher) makeHashNode(data []byte) hashNode {
n := make(hashNode, h.sha.Size())
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(n)
return n
}

// commitLoop does the actual insert + leaf callback for nodes
func (h *hasher) commitLoop(db *Database, wg *sync.WaitGroup) {
defer wg.Done()
for item := range h.leafCh {
var (
hash = item.hash
size = item.size
n = item.node
)
// We are pooling the trie nodes into an intermediate memory cache
db.lock.Lock()
db.insert(hash, size, n)
db.lock.Unlock()
if h.onleaf != nil {
switch n := n.(type) {
case *shortNode:
Expand All @@ -203,13 +287,4 @@ func (h *hasher) store(n node, db *Database, force bool) (node, error) {
}
}
}
return hash, nil
}

func (h *hasher) makeHashNode(data []byte) hashNode {
n := make(hashNode, h.sha.Size())
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(n)
return n
}
17 changes: 16 additions & 1 deletion trie/trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package trie
import (
"bytes"
"fmt"
"sync"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
Expand Down Expand Up @@ -415,7 +416,21 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
if t.db == nil {
panic("commit called on trie with nil database")
}
hash, cached, err := t.hashRoot(t.db, onleaf)
if t.root == nil {
return emptyRoot, nil
}
h := newHasher(onleaf)
defer returnHasherToPool(h)
var wg sync.WaitGroup
if onleaf != nil {
wg.Add(1)
go h.commitLoop(t.db, &wg)
}
hash, cached, err := h.hash(t.root, t.db, true)
if onleaf != nil {
close(h.leafCh)
wg.Wait()
}
if err != nil {
return common.Hash{}, err
}
Expand Down
141 changes: 141 additions & 0 deletions trie/trie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,147 @@ func BenchmarkHash(b *testing.B) {
trie.Hash()
}

type account struct {
Nonce uint64
Balance *big.Int
Root common.Hash
Code []byte
}

// Benchmarks the trie Commit following a Hash. Since the trie caches the result of any operation,
// we cannot use b.N as the number of hashing rouns, since all rounds apart from
// the first one will be NOOP. As such, we'll use b.N as the number of account to
// insert into the trie before measuring the hashing.
func BenchmarkCommitAfterHash(b *testing.B) {
b.Run("no-onleaf", func(b *testing.B) {
benchmarkCommitAfterHash(b, nil)
})
onleaf := func(leaf []byte, parent common.Hash) error {
var a account
rlp.DecodeBytes(leaf, &a)
return nil
}
b.Run("with-onleaf", func(b *testing.B) {
benchmarkCommitAfterHash(b, onleaf)
})
}

func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) {
// Make the random benchmark deterministic
random := rand.New(rand.NewSource(0))
// Create a realistic account trie to hash
addresses := make([][20]byte, b.N)
for i := 0; i < len(addresses); i++ {
for j := 0; j < len(addresses[i]); j++ {
addresses[i][j] = byte(random.Intn(256))
}
}
accounts := make([][]byte, len(addresses))
for i := 0; i < len(accounts); i++ {
var (
nonce = uint64(random.Int63())
balance = new(big.Int).Rand(random, new(big.Int).Exp(common.Big2, common.Big256, nil))
root = emptyRoot
code = crypto.Keccak256(nil)
)
accounts[i], _ = rlp.EncodeToBytes(&account{nonce, balance, root, code})
}
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Insert the accounts into the trie and hash it
trie.Hash()
b.ResetTimer()
b.ReportAllocs()
trie.Commit(onleaf)
}

func TestCommitAfterHash(t *testing.T) {
// Create a realistic account trie to hash
addresses, accounts := makeAccounts(1000)
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Insert the accounts into the trie and hash it
trie.Hash()
trie.Commit(nil)
}

func makeAccounts(size int) (addresses [][20]byte, accounts [][]byte) {
// Make the random benchmark deterministic
random := rand.New(rand.NewSource(0))
// Create a realistic account trie to hash
addresses = make([][20]byte, size)
for i := 0; i < len(addresses); i++ {
for j := 0; j < len(addresses[i]); j++ {
addresses[i][j] = byte(random.Intn(256))
}
}
accounts = make([][]byte, len(addresses))
for i := 0; i < len(accounts); i++ {
var (
nonce = uint64(random.Int63())
balance = new(big.Int).Rand(random, new(big.Int).Exp(common.Big2, common.Big256, nil))
root = emptyRoot
code = crypto.Keccak256(nil)
)
accounts[i], _ = rlp.EncodeToBytes(&account{nonce, balance, root, code})
}
return addresses, accounts
}

// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple
// of thousand entries)
//
func BenchmarkCommitAfterHashFixedSize(b *testing.B) {
b.Run("10", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(20)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
b.Run("100", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})

b.Run("1000", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(1000)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
b.Run("10000", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(10000)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
}

func benchmarkCommitAfterHashFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) {
b.ReportAllocs()
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Insert the accounts into the trie and hash it
trie.Hash()
b.StartTimer()
trie.Commit(nil)
b.StopTimer()
}

func tempDB() (string, *Database) {
dir, err := ioutil.TempDir("", "trie-bench")
if err != nil {
Expand Down