Skip to content

Commit 04e5c08

Browse files
committed
Add NewUniqueNodeIterator() to skip shared nodes
NewUniqueNodeIterator() can be used to optimize node iteration for forest. It skips shared sub-tries that were visited and only iterates unique nodes.
1 parent 45d23da commit 04e5c08

File tree

2 files changed

+236
-5
lines changed

2 files changed

+236
-5
lines changed

ledger/complete/mtrie/flattener/iterator.go

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type NodeIterator struct {
3838
// no children, it can be recalled without restriction.
3939
// * When popping node `n` from the stack, its parent `p` (if it exists) is now the
4040
// head of the stack.
41-
// - If `p` has only one child, this child is must be `n`.
41+
// - If `p` has only one child, this child must be `n`.
4242
// Therefore, by recalling `n`, we have recalled all ancestors of `p`.
4343
// - If `n` is the right child, we haven already searched through all of `p`
4444
// descendents (as the `p.LeftChild` must have been searched before)
@@ -53,6 +53,15 @@ type NodeIterator struct {
5353
// This has the advantage, that we gracefully handle tries whose root node is nil.
5454
unprocessedRoot *node.Node
5555
stack []*node.Node
56+
// visitedNodes are nodes that were visited and can be skipped during
57+
// traversal through dig(). visitedNodes is used to optimize node traveral
58+
// IN FOREST by skipping nodes in shared sub-tries after they are visited,
59+
// because sub-tries are shared between tries (original MTrie before register updates
60+
// and updated MTrie after register writes).
61+
// NodeIterator only uses visitedNodes for read operation.
62+
// No special handling is needed if visitedNodes is nil.
63+
// WARNING: visitedNodes is not safe for concurrent use.
64+
visitedNodes map[*node.Node]uint64
5665
}
5766

5867
// NewNodeIterator returns a node NodeIterator, which iterates through all nodes
@@ -75,6 +84,30 @@ func NewNodeIterator(mTrie *trie.MTrie) *NodeIterator {
7584
return i
7685
}
7786

87+
// NewUniqueNodeIterator returns a node NodeIterator, which iterates through all unique nodes
88+
// that weren't visited. This should be used for forest node iteration to avoid repeatedly
89+
// traversing shared sub-tries.
90+
// The Iterator guarantees a DESCENDANTS-FIRST-RELATIONSHIP in the sequence of nodes it generates:
91+
// * Consider the sequence of nodes, in the order they are generated by NodeIterator.
92+
// Let `node[k]` denote the node with index `k` in this sequence.
93+
// * Descendents-First-Relationship means that for any `node[k]`, all its descendents
94+
// have indices strictly smaller than k in the iterator's sequence.
95+
// The Descendents-First-Relationship has the following important property:
96+
// When re-building the Trie from the sequence of nodes, one can build the trie on the fly,
97+
// as for each node, the children have been previously encountered.
98+
// WARNING: visitedNodes is not safe for concurrent use.
99+
func NewUniqueNodeIterator(mTrie *trie.MTrie, visitedNodes map[*node.Node]uint64) *NodeIterator {
100+
// For a Trie with height H (measured by number of edges), the longest possible path
101+
// contains H+1 vertices.
102+
stackSize := ledger.NodeMaxHeight + 1
103+
i := &NodeIterator{
104+
stack: make([]*node.Node, 0, stackSize),
105+
visitedNodes: visitedNodes,
106+
}
107+
i.unprocessedRoot = mTrie.RootNode()
108+
return i
109+
}
110+
78111
func (i *NodeIterator) Next() bool {
79112
if i.unprocessedRoot != nil {
80113
// initial call to Next() for a non-empty trie
@@ -125,15 +158,22 @@ func (i *NodeIterator) dig(n *node.Node) {
125158
if n == nil {
126159
return
127160
}
161+
if _, found := i.visitedNodes[n]; found {
162+
return
163+
}
128164
for {
129165
i.stack = append(i.stack, n)
130166
if lChild := n.LeftChild(); lChild != nil {
131-
n = lChild
132-
continue
167+
if _, found := i.visitedNodes[lChild]; !found {
168+
n = lChild
169+
continue
170+
}
133171
}
134172
if rChild := n.RightChild(); rChild != nil {
135-
n = rChild
136-
continue
173+
if _, found := i.visitedNodes[rChild]; !found {
174+
n = rChild
175+
continue
176+
}
137177
}
138178
return
139179
}

ledger/complete/mtrie/flattener/iterator_test.go

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/onflow/flow-go/ledger"
1010
"github.com/onflow/flow-go/ledger/common/utils"
1111
"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
12+
"github.com/onflow/flow-go/ledger/complete/mtrie/node"
1213
"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
1314
)
1415

@@ -73,3 +74,193 @@ func TestPopulatedTrie(t *testing.T) {
7374
require.False(t, itr.Next())
7475
require.True(t, nil == itr.Value())
7576
}
77+
78+
func TestUniqueNodeIterator(t *testing.T) {
79+
t.Run("empty trie", func(t *testing.T) {
80+
emptyTrie := trie.NewEmptyMTrie()
81+
82+
// visitedNodes is nil
83+
itr := flattener.NewUniqueNodeIterator(emptyTrie, nil)
84+
require.False(t, itr.Next())
85+
require.True(t, nil == itr.Value()) // initial iterator should return nil
86+
87+
// visitedNodes is empty map
88+
visitedNodes := make(map[*node.Node]uint64)
89+
itr = flattener.NewUniqueNodeIterator(emptyTrie, visitedNodes)
90+
require.False(t, itr.Next())
91+
require.True(t, nil == itr.Value()) // initial iterator should return nil
92+
})
93+
94+
t.Run("trie", func(t *testing.T) {
95+
emptyTrie := trie.NewEmptyMTrie()
96+
97+
// key: 0000...
98+
p1 := utils.PathByUint8(1)
99+
v1 := utils.LightPayload8('A', 'a')
100+
101+
// key: 0100....
102+
p2 := utils.PathByUint8(64)
103+
v2 := utils.LightPayload8('B', 'b')
104+
105+
paths := []ledger.Path{p1, p2}
106+
payloads := []ledger.Payload{*v1, *v2}
107+
108+
updatedTrie, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
109+
require.NoError(t, err)
110+
111+
// n4
112+
// /
113+
// /
114+
// n3
115+
// / \
116+
// / \
117+
// n1 (p1/v1) n2 (p2/v2)
118+
//
119+
120+
expectedNodes := []*node.Node{
121+
updatedTrie.RootNode().LeftChild().LeftChild(), // n1
122+
updatedTrie.RootNode().LeftChild().RightChild(), // n2
123+
updatedTrie.RootNode().LeftChild(), // n3
124+
updatedTrie.RootNode(), // n4
125+
}
126+
127+
// visitedNodes is nil
128+
i := 0
129+
for itr := flattener.NewUniqueNodeIterator(updatedTrie, nil); itr.Next(); {
130+
n := itr.Value()
131+
require.True(t, i < len(expectedNodes))
132+
require.Equal(t, expectedNodes[i], n)
133+
i++
134+
}
135+
require.Equal(t, i, len(expectedNodes))
136+
137+
// visitedNodes is not nil, but it's pointless for iterating a single trie because
138+
// there isn't any shared sub-trie.
139+
visitedNodes := make(map[*node.Node]uint64)
140+
i = 0
141+
for itr := flattener.NewUniqueNodeIterator(updatedTrie, visitedNodes); itr.Next(); {
142+
n := itr.Value()
143+
visitedNodes[n] = uint64(i)
144+
145+
require.True(t, i < len(expectedNodes))
146+
require.Equal(t, expectedNodes[i], n)
147+
i++
148+
}
149+
require.Equal(t, i, len(expectedNodes))
150+
})
151+
152+
t.Run("forest", func(t *testing.T) {
153+
154+
// Forest is a slice of mtries to guarantee order.
155+
f := make([]*trie.MTrie, 0)
156+
157+
emptyTrie := trie.NewEmptyMTrie()
158+
159+
// key: 0000...
160+
p1 := utils.PathByUint8(1)
161+
v1 := utils.LightPayload8('A', 'a')
162+
163+
// key: 0100....
164+
p2 := utils.PathByUint8(64)
165+
v2 := utils.LightPayload8('B', 'b')
166+
167+
paths := []ledger.Path{p1, p2}
168+
payloads := []ledger.Payload{*v1, *v2}
169+
170+
trie1, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
171+
require.NoError(t, err)
172+
173+
f = append(f, trie1)
174+
175+
// n4
176+
// /
177+
// /
178+
// n3
179+
// / \
180+
// / \
181+
// n1 (p1/v1) n2 (p2/v2)
182+
//
183+
184+
// New trie reuses its parent's left sub-trie.
185+
186+
// key: 1000...
187+
p3 := utils.PathByUint8(128)
188+
v3 := utils.LightPayload8('C', 'c')
189+
190+
// key: 1100....
191+
p4 := utils.PathByUint8(192)
192+
v4 := utils.LightPayload8('D', 'd')
193+
194+
paths = []ledger.Path{p3, p4}
195+
payloads = []ledger.Payload{*v3, *v4}
196+
197+
trie2, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
198+
require.NoError(t, err)
199+
200+
f = append(f, trie2)
201+
202+
// n8
203+
// / \
204+
// / \
205+
// n3 n7
206+
// (shared) / \
207+
// / \
208+
// n5 n6
209+
// (p3/v3) (p4/v4)
210+
211+
// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.
212+
213+
// key: 0000...
214+
v5 := utils.LightPayload8('E', 'e')
215+
216+
paths = []ledger.Path{p1}
217+
payloads = []ledger.Payload{*v5}
218+
219+
trie3, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
220+
require.NoError(t, err)
221+
222+
f = append(f, trie3)
223+
224+
// n11
225+
// / \
226+
// / \
227+
// n10 n7
228+
// / \ (shared)
229+
// / \
230+
// n9 n2
231+
// (p1/v5) (shared)
232+
233+
expectedNodes := []*node.Node{
234+
// unique nodes from trie1
235+
trie1.RootNode().LeftChild().LeftChild(), // n1
236+
trie1.RootNode().LeftChild().RightChild(), // n2
237+
trie1.RootNode().LeftChild(), // n3
238+
trie1.RootNode(), // n4
239+
// unique nodes from trie2
240+
trie2.RootNode().RightChild().LeftChild(), // n5
241+
trie2.RootNode().RightChild().RightChild(), // n6
242+
trie2.RootNode().RightChild(), // n7
243+
trie2.RootNode(), // n8
244+
// unique nodes from trie3
245+
trie3.RootNode().LeftChild().LeftChild(), // n9
246+
trie3.RootNode().LeftChild(), // n10
247+
trie3.RootNode(), // n11
248+
249+
}
250+
251+
// Use visitedNodes to prevent revisiting shared sub-tries.
252+
visitedNodes := make(map[*node.Node]uint64)
253+
i := 0
254+
for _, trie := range f {
255+
for itr := flattener.NewUniqueNodeIterator(trie, visitedNodes); itr.Next(); {
256+
n := itr.Value()
257+
visitedNodes[n] = uint64(i)
258+
259+
require.True(t, i < len(expectedNodes))
260+
require.Equal(t, expectedNodes[i], n)
261+
i++
262+
}
263+
}
264+
require.Equal(t, i, len(expectedNodes))
265+
})
266+
}

0 commit comments

Comments
 (0)