Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EN Performance] Optimize checkpoint serialization for -37GB operational RAM, -2.7 minutes duration, -19.6 million allocs (50% fewer allocs) #3050

Merged
merged 7 commits into from
Aug 23, 2022
11 changes: 5 additions & 6 deletions ledger/complete/mtrie/flattener/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package flattener
import (
"github.com/onflow/flow-go/ledger"
"github.com/onflow/flow-go/ledger/complete/mtrie/node"
"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
)

// NodeIterator is an iterator over the nodes in a trie.
Expand Down Expand Up @@ -76,13 +75,13 @@ type NodeIterator struct {
// as for each node, the children have been previously encountered.
// NodeIterator created by NewNodeIterator is safe for concurrent use
// because visitedNodes is always nil in this case.
func NewNodeIterator(mTrie *trie.MTrie) *NodeIterator {
func NewNodeIterator(n *node.Node) *NodeIterator {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for fixing this node iterator to be a proper node iterator.

// for a Trie with height H (measured by number of edges), the longest possible path contains H+1 vertices
stackSize := ledger.NodeMaxHeight + 1
i := &NodeIterator{
stack: make([]*node.Node, 0, stackSize),
}
i.unprocessedRoot = mTrie.RootNode()
i.unprocessedRoot = n
return i
}

Expand All @@ -98,15 +97,15 @@ func NewNodeIterator(mTrie *trie.MTrie) *NodeIterator {
// When re-building the Trie from the sequence of nodes, one can build the trie on the fly,
// as for each node, the children have been previously encountered.
// WARNING: visitedNodes is not safe for concurrent use.
func NewUniqueNodeIterator(mTrie *trie.MTrie, visitedNodes map[*node.Node]uint64) *NodeIterator {
func NewUniqueNodeIterator(n *node.Node, visitedNodes map[*node.Node]uint64) *NodeIterator {
// For a Trie with height H (measured by number of edges), the longest possible path
// contains H+1 vertices.
stackSize := ledger.NodeMaxHeight + 1
i := &NodeIterator{
stack: make([]*node.Node, 0, stackSize),
visitedNodes: visitedNodes,
}
i.unprocessedRoot = mTrie.RootNode()
i.unprocessedRoot = n
return i
}

Expand All @@ -115,7 +114,7 @@ func (i *NodeIterator) Next() bool {
// initial call to Next() for a non-empty trie
i.dig(i.unprocessedRoot)
i.unprocessedRoot = nil
return true
return len(i.stack) > 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did we change this? It seems like an important change, but I don't see why we would

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bug fix for a problem that didn't surface yet because of the way node iterator was used.

The bug is unique node iterator's Next() returns true when i.unprocessedRoot is visited already and i.stack is empty.

This bug doesn't happen when we iterate nodes of an entire trie (root nodes are always unique). In this PR, we iterate nodes of subtries and subtries can be shared and visited already. So instead of always returning true assuming there's at least one unique node when digging i.unprocessedRoot, we only return true when there are unique nodes in the internal stack after calling dig(i.unprocessedRoot).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation! It does make sense.
Would it be possible to maybe add a test catching this particular bug, and showing how the fix helps?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to maybe add a test catching this particular bug, and showing how the fix helps?

Test for this bug is already in iterator_test.go#L269-L396.

The test iterates 3 left substries and 3 right subtries (some subtries are shared). The test verifies that:

  • order of iterated nodes is descendents first
  • shared subtries/nodes are not iterated twice
  • non-nil node is returned (meaning as long as Next() returns true Value() returns a non-nil node)

}

// the current head of the stack, `n`, has been recalled
Expand Down
151 changes: 140 additions & 11 deletions ledger/complete/mtrie/flattener/iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
func TestEmptyTrie(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

itr := flattener.NewNodeIterator(emptyTrie)
itr := flattener.NewNodeIterator(emptyTrie.RootNode())
require.True(t, nil == itr.Value()) // initial iterator should return nil

require.False(t, itr.Next())
Expand All @@ -30,7 +30,7 @@ func TestPopulatedTrie(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

// key: 0000...
p1 := testutils.PathByUint8(1)
p1 := testutils.PathByUint8(0)
v1 := testutils.LightPayload8('A', 'a')

// key: 0100....
Expand All @@ -43,12 +43,12 @@ func TestPopulatedTrie(t *testing.T) {
testTrie, _, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
require.NoError(t, err)

for itr := flattener.NewNodeIterator(testTrie); itr.Next(); {
for itr := flattener.NewNodeIterator(testTrie.RootNode()); itr.Next(); {
fmt.Println(itr.Value().FmtStr("", ""))
fmt.Println()
}

itr := flattener.NewNodeIterator(testTrie)
itr := flattener.NewNodeIterator(testTrie.RootNode())

require.True(t, itr.Next())
p1_leaf := itr.Value()
Expand Down Expand Up @@ -80,13 +80,13 @@ func TestUniqueNodeIterator(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

// visitedNodes is nil
itr := flattener.NewUniqueNodeIterator(emptyTrie, nil)
itr := flattener.NewUniqueNodeIterator(emptyTrie.RootNode(), nil)
require.False(t, itr.Next())
require.True(t, nil == itr.Value()) // initial iterator should return nil

// visitedNodes is empty map
visitedNodes := make(map[*node.Node]uint64)
itr = flattener.NewUniqueNodeIterator(emptyTrie, visitedNodes)
itr = flattener.NewUniqueNodeIterator(emptyTrie.RootNode(), visitedNodes)
require.False(t, itr.Next())
require.True(t, nil == itr.Value()) // initial iterator should return nil
})
Expand All @@ -95,7 +95,7 @@ func TestUniqueNodeIterator(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

// key: 0000...
p1 := testutils.PathByUint8(1)
p1 := testutils.PathByUint8(0)
v1 := testutils.LightPayload8('A', 'a')

// key: 0100....
Expand Down Expand Up @@ -126,7 +126,7 @@ func TestUniqueNodeIterator(t *testing.T) {

// visitedNodes is nil
i := 0
for itr := flattener.NewUniqueNodeIterator(updatedTrie, nil); itr.Next(); {
for itr := flattener.NewUniqueNodeIterator(updatedTrie.RootNode(), nil); itr.Next(); {
n := itr.Value()
require.True(t, i < len(expectedNodes))
require.Equal(t, expectedNodes[i], n)
Expand All @@ -138,7 +138,7 @@ func TestUniqueNodeIterator(t *testing.T) {
// there isn't any shared sub-trie.
visitedNodes := make(map[*node.Node]uint64)
i = 0
for itr := flattener.NewUniqueNodeIterator(updatedTrie, visitedNodes); itr.Next(); {
for itr := flattener.NewUniqueNodeIterator(updatedTrie.RootNode(), visitedNodes); itr.Next(); {
n := itr.Value()
visitedNodes[n] = uint64(i)

Expand All @@ -157,7 +157,7 @@ func TestUniqueNodeIterator(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

// key: 0000...
p1 := testutils.PathByUint8(1)
p1 := testutils.PathByUint8(0)
v1 := testutils.LightPayload8('A', 'a')

// key: 0100....
Expand Down Expand Up @@ -254,7 +254,7 @@ func TestUniqueNodeIterator(t *testing.T) {
visitedNodes := make(map[*node.Node]uint64)
i := 0
for _, trie := range tries {
for itr := flattener.NewUniqueNodeIterator(trie, visitedNodes); itr.Next(); {
for itr := flattener.NewUniqueNodeIterator(trie.RootNode(), visitedNodes); itr.Next(); {
n := itr.Value()
visitedNodes[n] = uint64(i)

Expand All @@ -265,4 +265,133 @@ func TestUniqueNodeIterator(t *testing.T) {
}
require.Equal(t, i, len(expectedNodes))
})

t.Run("subtries", func(t *testing.T) {

emptyTrie := trie.NewEmptyMTrie()

// key: 0000...
p1 := testutils.PathByUint8(0)
v1 := testutils.LightPayload8('A', 'a')

// key: 0100....
p2 := testutils.PathByUint8(64)
v2 := testutils.LightPayload8('B', 'b')

paths := []ledger.Path{p1, p2}
payloads := []ledger.Payload{*v1, *v2}

trie1, _, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
require.NoError(t, err)

// trie1
// n4
// /
// /
// n3
// / \
// / \
// n1 (p1/v1) n2 (p2/v2)
//

// New trie reuses its parent's left sub-trie.

// key: 1000...
p3 := testutils.PathByUint8(128)
v3 := testutils.LightPayload8('C', 'c')

// key: 1100....
p4 := testutils.PathByUint8(192)
v4 := testutils.LightPayload8('D', 'd')

paths = []ledger.Path{p3, p4}
payloads = []ledger.Payload{*v3, *v4}

trie2, _, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
require.NoError(t, err)

// trie2
// n8
// / \
// / \
// n3 n7
// (shared) / \
// / \
// n5 n6
// (p3/v3) (p4/v4)

// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.

// key: 0000...
v5 := testutils.LightPayload8('E', 'e')

paths = []ledger.Path{p1}
payloads = []ledger.Payload{*v5}

trie3, _, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
require.NoError(t, err)

// trie3
// n11
// / \
// / \
// n10 n7
// / \ (shared)
// / \
// n9 n2
// (p1/v5) (shared)

leftSubtries := []*node.Node{
trie1.RootNode().LeftChild(),
trie2.RootNode().LeftChild(),
trie3.RootNode().LeftChild(),
}

expectedNodesInLeftSubtries := []*node.Node{
// unique nodes from trie1
trie1.RootNode().LeftChild().LeftChild(), // n1
trie1.RootNode().LeftChild().RightChild(), // n2
trie1.RootNode().LeftChild(), // n3
// unique nodes from trie3
trie3.RootNode().LeftChild().LeftChild(), // n9
trie3.RootNode().LeftChild(), // n10
}

rightSubtries := []*node.Node{
trie1.RootNode().RightChild(),
trie2.RootNode().RightChild(),
trie3.RootNode().RightChild(),
}

expectedNodesInRightSubtries := []*node.Node{
// unique nodes from trie2
trie2.RootNode().RightChild().LeftChild(), // n5
trie2.RootNode().RightChild().RightChild(), // n6
trie2.RootNode().RightChild(), // n7
}

testcases := []struct {
roots []*node.Node
expectedNodes []*node.Node
}{
{leftSubtries, expectedNodesInLeftSubtries},
{rightSubtries, expectedNodesInRightSubtries},
}

for _, tc := range testcases {
visitedNodes := make(map[*node.Node]uint64)
i := 0
for _, n := range tc.roots {
for itr := flattener.NewUniqueNodeIterator(n, visitedNodes); itr.Next(); {
n := itr.Value()
visitedNodes[n] = uint64(i)

require.True(t, i < len(tc.expectedNodes))
require.Equal(t, tc.expectedNodes[i], n)
i++
}
}
require.Equal(t, i, len(tc.expectedNodes))
}
})
}
Loading