Skip to content

Commit a7e5071

Browse files
mergify[bot]ValarDragonrobert-zaremba
authored
perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (backport #10026) (#10114)
* perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (#10026) (cherry picked from commit 28bf2c1) # Conflicts: # CHANGELOG.md # store/cachekv/store.go * Fix merge conflict * fix changelog Co-authored-by: Dev Ojha <[email protected]> Co-authored-by: ValarDragon <[email protected]> Co-authored-by: Robert Zaremba <[email protected]>
1 parent 2e871eb commit a7e5071

File tree

3 files changed

+82
-117
lines changed

3 files changed

+82
-117
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ Ref: https://keepachangelog.com/en/1.0.0/
3636

3737
## Unreleased
3838

39+
### Improvements
40+
41+
* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions.
42+
3943
### Bug Fixes
4044

4145
* [\#9969](https://github.com/cosmos/cosmos-sdk/pull/9969) fix: use keyring in config for add-genesis-account cmd.

store/cachekv/memiterator.go

+22-79
Original file line numberDiff line numberDiff line change
@@ -1,107 +1,50 @@
11
package cachekv
22

33
import (
4-
"errors"
5-
64
dbm "github.com/tendermint/tm-db"
75

8-
"github.com/cosmos/cosmos-sdk/types/kv"
6+
"github.com/cosmos/cosmos-sdk/store/types"
97
)
108

119
// Iterates over iterKVCache items.
1210
// if key is nil, means it was deleted.
1311
// Implements Iterator.
1412
type memIterator struct {
15-
start, end []byte
16-
items []*kv.Pair
17-
ascending bool
18-
}
19-
20-
func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator {
21-
itemsInDomain := make([]*kv.Pair, 0, items.Len())
22-
23-
var entered bool
24-
25-
for e := items.Front(); e != nil; e = e.Next() {
26-
item := e.Value
27-
if !dbm.IsKeyInDomain(item.Key, start, end) {
28-
if entered {
29-
break
30-
}
31-
32-
continue
33-
}
34-
35-
itemsInDomain = append(itemsInDomain, item)
36-
entered = true
37-
}
13+
types.Iterator
3814

39-
return &memIterator{
40-
start: start,
41-
end: end,
42-
items: itemsInDomain,
43-
ascending: ascending,
44-
}
15+
deleted map[string]struct{}
4516
}
4617

47-
func (mi *memIterator) Domain() ([]byte, []byte) {
48-
return mi.start, mi.end
49-
}
18+
func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator {
19+
var iter types.Iterator
20+
var err error
5021

51-
func (mi *memIterator) Valid() bool {
52-
return len(mi.items) > 0
53-
}
22+
if ascending {
23+
iter, err = items.Iterator(start, end)
24+
} else {
25+
iter, err = items.ReverseIterator(start, end)
26+
}
5427

55-
func (mi *memIterator) assertValid() {
56-
if err := mi.Error(); err != nil {
28+
if err != nil {
5729
panic(err)
5830
}
59-
}
6031

61-
func (mi *memIterator) Next() {
62-
mi.assertValid()
63-
64-
if mi.ascending {
65-
mi.items = mi.items[1:]
66-
} else {
67-
mi.items = mi.items[:len(mi.items)-1]
32+
newDeleted := make(map[string]struct{})
33+
for k, v := range deleted {
34+
newDeleted[k] = v
6835
}
69-
}
7036

71-
func (mi *memIterator) Key() []byte {
72-
mi.assertValid()
37+
return &memIterator{
38+
Iterator: iter,
7339

74-
if mi.ascending {
75-
return mi.items[0].Key
40+
deleted: newDeleted,
7641
}
77-
78-
return mi.items[len(mi.items)-1].Key
7942
}
8043

8144
func (mi *memIterator) Value() []byte {
82-
mi.assertValid()
83-
84-
if mi.ascending {
85-
return mi.items[0].Value
86-
}
87-
88-
return mi.items[len(mi.items)-1].Value
89-
}
90-
91-
func (mi *memIterator) Close() error {
92-
mi.start = nil
93-
mi.end = nil
94-
mi.items = nil
95-
96-
return nil
97-
}
98-
99-
// Error returns an error if the memIterator is invalid defined by the Valid
100-
// method.
101-
func (mi *memIterator) Error() error {
102-
if !mi.Valid() {
103-
return errors.New("invalid memIterator")
45+
key := mi.Iterator.Key()
46+
if _, ok := mi.deleted[string(key)]; ok {
47+
return nil
10448
}
105-
106-
return nil
49+
return mi.Iterator.Value()
10750
}

store/cachekv/store.go

+56-38
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,17 @@ import (
2020
// If value is nil but deleted is false, it means the parent doesn't have the
2121
// key. (No need to delete upon Write())
2222
type cValue struct {
23-
value []byte
24-
deleted bool
25-
dirty bool
23+
value []byte
24+
dirty bool
2625
}
2726

2827
// Store wraps an in-memory cache around an underlying types.KVStore.
2928
type Store struct {
3029
mtx sync.Mutex
3130
cache map[string]*cValue
31+
deleted map[string]struct{}
3232
unsortedCache map[string]struct{}
33-
sortedCache *kv.List // always ascending sorted
33+
sortedCache *dbm.MemDB // always ascending sorted
3434
parent types.KVStore
3535
}
3636

@@ -40,8 +40,9 @@ var _ types.CacheKVStore = (*Store)(nil)
4040
func NewStore(parent types.KVStore) *Store {
4141
return &Store{
4242
cache: make(map[string]*cValue),
43+
deleted: make(map[string]struct{}),
4344
unsortedCache: make(map[string]struct{}),
44-
sortedCache: kv.NewList(),
45+
sortedCache: dbm.NewMemDB(),
4546
parent: parent,
4647
}
4748
}
@@ -120,7 +121,7 @@ func (store *Store) Write() {
120121
cacheValue := store.cache[key]
121122

122123
switch {
123-
case cacheValue.deleted:
124+
case store.isDeleted(key):
124125
store.parent.Delete([]byte(key))
125126
case cacheValue.value == nil:
126127
// Skip, it already doesn't exist in parent.
@@ -131,8 +132,9 @@ func (store *Store) Write() {
131132

132133
// Clear the cache
133134
store.cache = make(map[string]*cValue)
135+
store.deleted = make(map[string]struct{})
134136
store.unsortedCache = make(map[string]struct{})
135-
store.sortedCache = kv.NewList()
137+
store.sortedCache = dbm.NewMemDB()
136138
}
137139

138140
// CacheWrap implements CacheWrapper.
@@ -171,7 +173,7 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
171173
}
172174

173175
store.dirtyItems(start, end)
174-
cache = newMemIterator(start, end, store.sortedCache, ascending)
176+
cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)
175177

176178
return newCacheMergeIterator(parent, cache, ascending)
177179
}
@@ -180,7 +182,7 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
180182
// from string -> []byte to speed up operations, it is not meant
181183
// to be used generally, but for a specific pattern to check for available
182184
// keys within a domain.
183-
func strToByte(s string) []byte {
185+
func strToBytes(s string) []byte {
184186
var b []byte
185187
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
186188
hdr.Cap = len(s)
@@ -200,16 +202,33 @@ func byteSliceToStr(b []byte) string {
200202

201203
// Constructs a slice of dirty items, to use w/ memIterator.
202204
func (store *Store) dirtyItems(start, end []byte) {
203-
unsorted := make([]*kv.Pair, 0)
204-
205205
n := len(store.unsortedCache)
206-
for key := range store.unsortedCache {
207-
if dbm.IsKeyInDomain(strToByte(key), start, end) {
206+
unsorted := make([]*kv.Pair, 0)
207+
// If the unsortedCache is too big, its costs too much to determine
208+
// whats in the subset we are concerned about.
209+
// If you are interleaving iterator calls with writes, this can easily become an
210+
// O(N^2) overhead.
211+
// Even without that, too many range checks eventually becomes more expensive
212+
// than just not having the cache.
213+
if n >= 1024 {
214+
for key := range store.unsortedCache {
208215
cacheValue := store.cache[key]
209216
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
210217
}
218+
} else {
219+
// else do a linear scan to determine if the unsorted pairs are in the pool.
220+
for key := range store.unsortedCache {
221+
if dbm.IsKeyInDomain(strToBytes(key), start, end) {
222+
cacheValue := store.cache[key]
223+
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
224+
}
225+
}
211226
}
227+
store.clearUnsortedCacheSubset(unsorted)
228+
}
212229

230+
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
231+
n := len(store.unsortedCache)
213232
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
214233
for key := range store.unsortedCache {
215234
delete(store.unsortedCache, key)
@@ -219,32 +238,21 @@ func (store *Store) dirtyItems(start, end []byte) {
219238
delete(store.unsortedCache, byteSliceToStr(kv.Key))
220239
}
221240
}
222-
223241
sort.Slice(unsorted, func(i, j int) bool {
224242
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
225243
})
226244

227-
for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; {
228-
uitem := unsorted[0]
229-
sitem := e.Value
230-
comp := bytes.Compare(uitem.Key, sitem.Key)
231-
232-
switch comp {
233-
case -1:
234-
unsorted = unsorted[1:]
235-
236-
store.sortedCache.InsertBefore(uitem, e)
237-
case 1:
238-
e = e.Next()
239-
case 0:
240-
unsorted = unsorted[1:]
241-
e.Value = uitem
242-
e = e.Next()
245+
for _, item := range unsorted {
246+
if item.Value == nil {
247+
// deleted element, tracked by store.deleted
248+
// setting arbitrary value
249+
store.sortedCache.Set(item.Key, []byte{})
250+
continue
251+
}
252+
err := store.sortedCache.Set(item.Key, item.Value)
253+
if err != nil {
254+
panic(err)
243255
}
244-
}
245-
246-
for _, kvp := range unsorted {
247-
store.sortedCache.PushBack(kvp)
248256
}
249257
}
250258

@@ -253,12 +261,22 @@ func (store *Store) dirtyItems(start, end []byte) {
253261

254262
// Only entrypoint to mutate store.cache.
255263
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
256-
store.cache[string(key)] = &cValue{
257-
value: value,
258-
deleted: deleted,
259-
dirty: dirty,
264+
keyStr := byteSliceToStr(key)
265+
store.cache[keyStr] = &cValue{
266+
value: value,
267+
dirty: dirty,
268+
}
269+
if deleted {
270+
store.deleted[keyStr] = struct{}{}
271+
} else {
272+
delete(store.deleted, keyStr)
260273
}
261274
if dirty {
262275
store.unsortedCache[string(key)] = struct{}{}
263276
}
264277
}
278+
279+
func (store *Store) isDeleted(key string) bool {
280+
_, ok := store.deleted[key]
281+
return ok
282+
}

0 commit comments

Comments
 (0)