From 70b915e5d57e345d14086bb703cc1370e47aec8c Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Fri, 21 Jan 2022 17:58:14 +0400 Subject: [PATCH 01/18] blockstore on all dagstore cids --- go.mod | 2 + interface.go | 3 ++ readonly_bs.go | 133 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 readonly_bs.go diff --git a/go.mod b/go.mod index e526af7..475aeaf 100644 --- a/go.mod +++ b/go.mod @@ -4,9 +4,11 @@ go 1.16 require ( github.com/filecoin-project/go-indexer-core v0.2.4 + github.com/hashicorp/golang-lru v0.5.4 github.com/ipfs/go-block-format v0.0.3 github.com/ipfs/go-cid v0.1.0 github.com/ipfs/go-datastore v0.5.0 + github.com/ipfs/go-ipfs-blockstore v1.1.2 github.com/ipfs/go-log/v2 v2.3.0 github.com/ipld/go-car/v2 v2.1.1 github.com/libp2p/go-libp2p-core v0.9.0 diff --git a/interface.go b/interface.go index 83f2b1b..43f2101 100644 --- a/interface.go +++ b/interface.go @@ -3,6 +3,8 @@ package dagstore import ( "context" + blockstore "github.com/ipfs/go-ipfs-blockstore" + carindex "github.com/ipld/go-car/v2/index" mh "github.com/multiformats/go-multihash" @@ -24,4 +26,5 @@ type Interface interface { ShardsContainingMultihash(h mh.Multihash) ([]shard.Key, error) GC(ctx context.Context) (*GCResult, error) Close() error + AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) } diff --git a/readonly_bs.go b/readonly_bs.go new file mode 100644 index 0000000..24d07b6 --- /dev/null +++ b/readonly_bs.go @@ -0,0 +1,133 @@ +package dagstore + +import ( + "context" + "errors" + "fmt" + + "github.com/filecoin-project/dagstore/shard" + lru "github.com/hashicorp/golang-lru" + + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + blockstore "github.com/ipfs/go-ipfs-blockstore" +) + +var _ blockstore.Blockstore = (*AllShardsReadBlockstore)(nil) + +type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) + +type AllShardsReadBlockstore struct { + d *DAGStore + shardSelectF ShardSelectorF + + bsCache *lru.ARCCache // thread-safe +} + +func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { + lru, err := lru.NewARC(maxCacheSize) + if err != nil { + return nil, fmt.Errorf("failed to create lru cache for read only blockstores") + } + + return &AllShardsReadBlockstore{ + d: d, + shardSelectF: shardSelector, + bsCache: lru, + }, nil + +} + +func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.Block, error) { // get all the shards containing the mh + shards, err := ro.d.ShardsContainingMultihash(c.Hash()) + if err != nil { + return nil, fmt.Errorf("failed to fetch shards containing the block: %w", err) + } + + // do we have a cached blockstore for a shard containing the given cid ? If yes, serve the block from that cid + for _, sk := range shards { + // a valid cache hit here updates the priority of the shard's blockstore in the LRU cache. + val, ok := ro.bsCache.Get(sk) + if !ok { + continue + } + + rbs := val.(ReadBlockstore) + blk, err := rbs.Get(ctx, c) + if err != nil { + ro.bsCache.Remove(sk) + continue + } + return blk, nil + } + + // ---- we don't have a cached blockstore for a shard that can serve the block -> let's build one. + + // select a valid shard that can serve the retrieval + sk, err := ro.shardSelectF(c, shards) + if err != nil { + return nil, fmt.Errorf("failed to select a shard: %w", err) + } + + // load blockstore for the given shard + resch := make(chan ShardResult, 1) + // TODO Optmize index deserialisation in memory to reduce the memory footprint of the cache + if err := ro.d.AcquireShard(ctx, sk, resch, AcquireOpts{}); err != nil { + return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) + } + var res ShardResult + select { + case <-ctx.Done(): + return nil, ctx.Err() + case res = <-resch: + if res.Error != nil { + return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, res.Error) + } + } + + bs, err := res.Accessor.Blockstore() + if err != nil { + return nil, fmt.Errorf("failed top load read only blockstore for shard %s: %w", sk, err) + } + + // update lru cache + ro.bsCache.Add(sk, bs) + + return bs.Get(ctx, c) +} + +func (ro *AllShardsReadBlockstore) Has(_ context.Context, c cid.Cid) (bool, error) { + shards, err := ro.d.ShardsContainingMultihash(c.Hash()) + if err != nil { + return false, fmt.Errorf("failed to fetch shards containing the multihash %w", err) + } + + // if there is a shard we can serve the retrieval from, we have the requested cid. + _, err = ro.shardSelectF(c, shards) + if err != nil { + return false, fmt.Errorf("failed to select a shard: %w", err) + } + + return true, nil +} + +func (ro *AllShardsReadBlockstore) HashOnRead(_ bool) { + panic(errors.New("unsupported operation HashOnRead")) +} + +// GetSize returns the CIDs mapped BlockSize +func (ro *AllShardsReadBlockstore) GetSize(context.Context, cid.Cid) (int, error) { + return 100000000000, nil +} +func (ro *AllShardsReadBlockstore) DeleteBlock(context.Context, cid.Cid) error { + return errors.New("unsupported operation DeleteBlock") +} +func (ro *AllShardsReadBlockstore) Put(context.Context, blocks.Block) error { + return errors.New("unsupported operation Put") +} +func (ro *AllShardsReadBlockstore) PutMany(context.Context, []blocks.Block) error { + return errors.New("unsupported operation PutMany") +} +func (ro *AllShardsReadBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { + return nil, errors.New("unsupported operation AllKeysChan") +} From 72f676fe8f38cb8f671baf54da16078ba362ee62 Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Fri, 21 Jan 2022 18:49:31 +0400 Subject: [PATCH 02/18] double caching --- interface.go | 2 +- readonly_bs.go | 55 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/interface.go b/interface.go index 43f2101..4fe4ae4 100644 --- a/interface.go +++ b/interface.go @@ -26,5 +26,5 @@ type Interface interface { ShardsContainingMultihash(h mh.Multihash) ([]shard.Key, error) GC(ctx context.Context) (*GCResult, error) Close() error - AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) + AllShardsReadBlockstore(shardSelector ShardSelectorF, maxBSCachesize int, maxBlkCachesize int) (blockstore.Blockstore, error) } diff --git a/readonly_bs.go b/readonly_bs.go index 24d07b6..1ebb56c 100644 --- a/readonly_bs.go +++ b/readonly_bs.go @@ -21,30 +21,51 @@ type AllShardsReadBlockstore struct { d *DAGStore shardSelectF ShardSelectorF - bsCache *lru.ARCCache // thread-safe + // caches the carV1 payload stream and the carv2 index for shard read affinity i.e. further reads will likely be from the same shard. + // shard key -> read only blockstore (CARV1 stream + CARv2 Index) + bsCache *lru.ARCCache + + // caches the blocks themselves -> can be scaled by using a redis/memcache etc distributed cache + // multihash -> block + blkCache *lru.ARCCache } -func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { - lru, err := lru.NewARC(maxCacheSize) +func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { + bslru, err := lru.NewARC(maxCacheSize) if err != nil { return nil, fmt.Errorf("failed to create lru cache for read only blockstores") } + blkLru, err := lru.NewARC(maxBlocks) + if err != nil { + return nil, fmt.Errorf("failed to create lru cache for blocks: %w", err) + } return &AllShardsReadBlockstore{ d: d, shardSelectF: shardSelector, - bsCache: lru, + bsCache: bslru, + blkCache: blkLru, }, nil } func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.Block, error) { // get all the shards containing the mh - shards, err := ro.d.ShardsContainingMultihash(c.Hash()) + mhash := c.Hash() + // do we have the block cached ? + if val, ok := ro.blkCache.Get(mhash); ok { + return val.(blocks.Block), nil + } + + // fetch all the shards containing the multihash + shards, err := ro.d.ShardsContainingMultihash(mhash) if err != nil { return nil, fmt.Errorf("failed to fetch shards containing the block: %w", err) } + if len(shards) == 0 { + return nil, errors.New("no shards contain the requested block") + } - // do we have a cached blockstore for a shard containing the given cid ? If yes, serve the block from that cid + // do we have a cached blockstore for a shard containing the required block ? If yes, serve the block from that shard for _, sk := range shards { // a valid cache hit here updates the priority of the shard's blockstore in the LRU cache. val, ok := ro.bsCache.Get(sk) @@ -58,6 +79,9 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B ro.bsCache.Remove(sk) continue } + + // add the block to the block cache + ro.blkCache.Add(mhash, blk) return blk, nil } @@ -90,10 +114,16 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B return nil, fmt.Errorf("failed top load read only blockstore for shard %s: %w", sk, err) } - // update lru cache + blk, err := bs.Get(ctx, c) + if err != nil { + return nil, fmt.Errorf("failed to get block: %w", err) + } + + // update lru caches ro.bsCache.Add(sk, bs) + ro.blkCache.Add(mhash, blk) - return bs.Get(ctx, c) + return blk, nil } func (ro *AllShardsReadBlockstore) Has(_ context.Context, c cid.Cid) (bool, error) { @@ -116,8 +146,13 @@ func (ro *AllShardsReadBlockstore) HashOnRead(_ bool) { } // GetSize returns the CIDs mapped BlockSize -func (ro *AllShardsReadBlockstore) GetSize(context.Context, cid.Cid) (int, error) { - return 100000000000, nil +func (ro *AllShardsReadBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { + blk, err := ro.Get(ctx, c) + if err != nil { + return 0, fmt.Errorf("failed to get block: %w", err) + } + + return len(blk.RawData()), nil } func (ro *AllShardsReadBlockstore) DeleteBlock(context.Context, cid.Cid) error { return errors.New("unsupported operation DeleteBlock") From 38d0ceb4f135caaa16ae29d133ff285cb61f10b7 Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Fri, 21 Jan 2022 19:28:34 +0400 Subject: [PATCH 03/18] key by mh --- readonly_bs.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readonly_bs.go b/readonly_bs.go index 1ebb56c..59dad7c 100644 --- a/readonly_bs.go +++ b/readonly_bs.go @@ -52,7 +52,7 @@ func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCach func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.Block, error) { // get all the shards containing the mh mhash := c.Hash() // do we have the block cached ? - if val, ok := ro.blkCache.Get(mhash); ok { + if val, ok := ro.blkCache.Get(mhash.String()); ok { return val.(blocks.Block), nil } @@ -81,7 +81,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B } // add the block to the block cache - ro.blkCache.Add(mhash, blk) + ro.blkCache.Add(mhash.String(), blk) return blk, nil } @@ -121,7 +121,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B // update lru caches ro.bsCache.Add(sk, bs) - ro.blkCache.Add(mhash, blk) + ro.blkCache.Add(mhash.String(), blk) return blk, nil } From bc3cae8199f9576f388948e3788389d3b037072a Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Thu, 27 Jan 2022 16:27:32 +0400 Subject: [PATCH 04/18] ensure we close shard accessors --- readonly_bs.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/readonly_bs.go b/readonly_bs.go index 59dad7c..88138f4 100644 --- a/readonly_bs.go +++ b/readonly_bs.go @@ -17,13 +17,18 @@ var _ blockstore.Blockstore = (*AllShardsReadBlockstore)(nil) type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) +type accessorWithBlockstore struct { + sa *ShardAccessor + bs ReadBlockstore +} + type AllShardsReadBlockstore struct { d *DAGStore shardSelectF ShardSelectorF // caches the carV1 payload stream and the carv2 index for shard read affinity i.e. further reads will likely be from the same shard. // shard key -> read only blockstore (CARV1 stream + CARv2 Index) - bsCache *lru.ARCCache + bsCache *lru.Cache // caches the blocks themselves -> can be scaled by using a redis/memcache etc distributed cache // multihash -> block @@ -31,7 +36,10 @@ type AllShardsReadBlockstore struct { } func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { - bslru, err := lru.NewARC(maxCacheSize) + bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { + abs := val.(*accessorWithBlockstore) + abs.sa.Close() + }) if err != nil { return nil, fmt.Errorf("failed to create lru cache for read only blockstores") } @@ -73,7 +81,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B continue } - rbs := val.(ReadBlockstore) + rbs := val.(*accessorWithBlockstore).bs blk, err := rbs.Get(ctx, c) if err != nil { ro.bsCache.Remove(sk) @@ -109,7 +117,8 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B } } - bs, err := res.Accessor.Blockstore() + sa := res.Accessor + bs, err := sa.Blockstore() if err != nil { return nil, fmt.Errorf("failed top load read only blockstore for shard %s: %w", sk, err) } @@ -120,7 +129,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B } // update lru caches - ro.bsCache.Add(sk, bs) + ro.bsCache.Add(sk, &accessorWithBlockstore{sa, bs}) ro.blkCache.Add(mhash.String(), blk) return blk, nil From 15ec8e35f3ac0a7d52dfef1586e772701f989cc7 Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Thu, 27 Jan 2022 20:55:31 +0400 Subject: [PATCH 05/18] better logging --- readonly_bs.go | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/readonly_bs.go b/readonly_bs.go index 88138f4..c768288 100644 --- a/readonly_bs.go +++ b/readonly_bs.go @@ -5,6 +5,8 @@ import ( "errors" "fmt" + logging "github.com/ipfs/go-log/v2" + "github.com/filecoin-project/dagstore/shard" lru "github.com/hashicorp/golang-lru" @@ -13,6 +15,8 @@ import ( blockstore "github.com/ipfs/go-ipfs-blockstore" ) +var logbs = logging.Logger("dagstore_all_bs") + var _ blockstore.Blockstore = (*AllShardsReadBlockstore)(nil) type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) @@ -57,10 +61,18 @@ func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCach } -func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.Block, error) { // get all the shards containing the mh +func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block, finalErr error) { // get all the shards containing the mh + logbs.Infow("bitswap Get", "cid", c) + defer func() { + if finalErr != nil { + logbs.Errorw("bitswap Get: got error", "cid", c, "error", finalErr) + } + }() + mhash := c.Hash() // do we have the block cached ? if val, ok := ro.blkCache.Get(mhash.String()); ok { + logbs.Infow("bitswap Get: returning from block cache", "cid", c) return val.(blocks.Block), nil } @@ -89,6 +101,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B } // add the block to the block cache + logbs.Infow("bitswap Get: returning from block store cache", "cid", c) ro.blkCache.Add(mhash.String(), blk) return blk, nil } @@ -132,21 +145,31 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.B ro.bsCache.Add(sk, &accessorWithBlockstore{sa, bs}) ro.blkCache.Add(mhash.String(), blk) + logbs.Infow("bitswap Get: returning after creating new blockstore", "cid", c) return blk, nil } func (ro *AllShardsReadBlockstore) Has(_ context.Context, c cid.Cid) (bool, error) { + logbs.Infow("bitswap Has", "cid", c) + shards, err := ro.d.ShardsContainingMultihash(c.Hash()) if err != nil { + logbs.Errorw("bitswap Has", "cid", c, "error", err) return false, fmt.Errorf("failed to fetch shards containing the multihash %w", err) } + if len(shards) == 0 { + logbs.Infow("bitswap Has: returning false no error", "cid", c) + return false, nil + } // if there is a shard we can serve the retrieval from, we have the requested cid. _, err = ro.shardSelectF(c, shards) if err != nil { + logbs.Errorw("bitswap Has", "cid", c, "err", err) return false, fmt.Errorf("failed to select a shard: %w", err) } + logbs.Infow("bitswap Has: returning true", "cid", c) return true, nil } @@ -156,11 +179,15 @@ func (ro *AllShardsReadBlockstore) HashOnRead(_ bool) { // GetSize returns the CIDs mapped BlockSize func (ro *AllShardsReadBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { + logbs.Infow("bitswap GetSize", "cid", c) + blk, err := ro.Get(ctx, c) if err != nil { + logbs.Errorw("bitswap GetSize error", "cid", c, "error", err) return 0, fmt.Errorf("failed to get block: %w", err) } + logbs.Infow("bitswap GetSize success", "cid", c) return len(blk.RawData()), nil } func (ro *AllShardsReadBlockstore) DeleteBlock(context.Context, cid.Cid) error { From bdfb1ecc01d8e5c1916749c66e293832a3ba0c5b Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Tue, 29 Mar 2022 10:54:34 +0530 Subject: [PATCH 06/18] ready for review --- go.mod | 4 +- readonly_bs.go | 93 +++++++++++++++++++++++---------------------- readonly_bs_test.go | 90 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 47 deletions(-) create mode 100644 readonly_bs_test.go diff --git a/go.mod b/go.mod index be3fc73..0a4aefd 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,11 @@ go 1.16 require ( github.com/hashicorp/golang-lru v0.5.4 - github.com/ipfs/go-ipfs-blockstore v1.1.2 github.com/ipfs/go-block-format v0.0.3 github.com/ipfs/go-cid v0.1.0 github.com/ipfs/go-datastore v0.5.0 github.com/ipfs/go-ds-leveldb v0.5.0 + github.com/ipfs/go-ipfs-blockstore v1.1.2 github.com/ipfs/go-ipfs-blocksutil v0.0.1 github.com/ipfs/go-log/v2 v2.3.0 github.com/ipld/go-car/v2 v2.1.1 @@ -22,4 +22,4 @@ require ( golang.org/x/exp v0.0.0-20210714144626-1041f73d31d8 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 -) \ No newline at end of file +) diff --git a/readonly_bs.go b/readonly_bs.go index c768288..205cc3d 100644 --- a/readonly_bs.go +++ b/readonly_bs.go @@ -15,10 +15,11 @@ import ( blockstore "github.com/ipfs/go-ipfs-blockstore" ) -var logbs = logging.Logger("dagstore_all_bs") +var logbs = logging.Logger("dagstore-all-readblockstore") var _ blockstore.Blockstore = (*AllShardsReadBlockstore)(nil) +// ShardSelectorF helps select a shard to fetch a cid from if the given cid is present in multiple shards. type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) type accessorWithBlockstore struct { @@ -26,58 +27,62 @@ type accessorWithBlockstore struct { bs ReadBlockstore } +// AllShardsReadBlockstore is a read only blockstore over all cids across all shards in the dagstore. type AllShardsReadBlockstore struct { d *DAGStore shardSelectF ShardSelectorF - // caches the carV1 payload stream and the carv2 index for shard read affinity i.e. further reads will likely be from the same shard. - // shard key -> read only blockstore (CARV1 stream + CARv2 Index) - bsCache *lru.Cache + // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. + // shard key -> read only blockstore + blockstoreCache *lru.Cache - // caches the blocks themselves -> can be scaled by using a redis/memcache etc distributed cache + // caches the blocks themselves -> can be scaled by using a redis/memcache etc distributed cache. // multihash -> block - blkCache *lru.ARCCache + blockCache *lru.Cache } func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { + // instantiate the blockstore cache bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { + // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. abs := val.(*accessorWithBlockstore) abs.sa.Close() }) if err != nil { return nil, fmt.Errorf("failed to create lru cache for read only blockstores") } - blkLru, err := lru.NewARC(maxBlocks) + + // instantiate the block cache + blkLru, err := lru.New(maxBlocks) if err != nil { return nil, fmt.Errorf("failed to create lru cache for blocks: %w", err) } return &AllShardsReadBlockstore{ - d: d, - shardSelectF: shardSelector, - bsCache: bslru, - blkCache: blkLru, + d: d, + shardSelectF: shardSelector, + blockstoreCache: bslru, + blockCache: blkLru, }, nil - } -func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block, finalErr error) { // get all the shards containing the mh - logbs.Infow("bitswap Get", "cid", c) +func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block, finalErr error) { + logbs.Debugw("bitswap Get called", "cid", c) defer func() { if finalErr != nil { - logbs.Errorw("bitswap Get: got error", "cid", c, "error", finalErr) + logbs.Debugw("bitswap Get: got error", "cid", c, "error", finalErr) } }() mhash := c.Hash() // do we have the block cached ? - if val, ok := ro.blkCache.Get(mhash.String()); ok { - logbs.Infow("bitswap Get: returning from block cache", "cid", c) + if val, ok := ro.blockCache.Get(mhash.String()); ok { + logbs.Debugw("bitswap Get: returning from block cache", "cid", c) return val.(blocks.Block), nil } // fetch all the shards containing the multihash - shards, err := ro.d.ShardsContainingMultihash(mhash) + shards, err := ro.d.ShardsContainingMultihash(ctx, mhash) if err != nil { return nil, fmt.Errorf("failed to fetch shards containing the block: %w", err) } @@ -85,10 +90,10 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks return nil, errors.New("no shards contain the requested block") } - // do we have a cached blockstore for a shard containing the required block ? If yes, serve the block from that shard + // do we have a cached blockstore for a shard containing the required block ? If yes, serve the block from that blockstore for _, sk := range shards { // a valid cache hit here updates the priority of the shard's blockstore in the LRU cache. - val, ok := ro.bsCache.Get(sk) + val, ok := ro.blockstoreCache.Get(sk) if !ok { continue } @@ -96,13 +101,13 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks rbs := val.(*accessorWithBlockstore).bs blk, err := rbs.Get(ctx, c) if err != nil { - ro.bsCache.Remove(sk) + ro.blockstoreCache.Remove(sk) continue } // add the block to the block cache - logbs.Infow("bitswap Get: returning from block store cache", "cid", c) - ro.blkCache.Add(mhash.String(), blk) + logbs.Debugw("bitswap Get: returning from block store cache", "cid", c) + ro.blockCache.Add(mhash.String(), blk) return blk, nil } @@ -114,9 +119,8 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks return nil, fmt.Errorf("failed to select a shard: %w", err) } - // load blockstore for the given shard + // load blockstore for the selected shard and tru to serve the cid from that blockstore. resch := make(chan ShardResult, 1) - // TODO Optmize index deserialisation in memory to reduce the memory footprint of the cache if err := ro.d.AcquireShard(ctx, sk, resch, AcquireOpts{}); err != nil { return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) } @@ -141,58 +145,57 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks return nil, fmt.Errorf("failed to get block: %w", err) } - // update lru caches - ro.bsCache.Add(sk, &accessorWithBlockstore{sa, bs}) - ro.blkCache.Add(mhash.String(), blk) + // update the block cache and the blockstore cache + ro.blockstoreCache.Add(sk, &accessorWithBlockstore{sa, bs}) + ro.blockCache.Add(mhash.String(), blk) - logbs.Infow("bitswap Get: returning after creating new blockstore", "cid", c) + logbs.Debugw("bitswap Get: returning after creating new blockstore", "cid", c) return blk, nil } -func (ro *AllShardsReadBlockstore) Has(_ context.Context, c cid.Cid) (bool, error) { - logbs.Infow("bitswap Has", "cid", c) +func (ro *AllShardsReadBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) { + logbs.Debugw("bitswap Has called", "cid", c) - shards, err := ro.d.ShardsContainingMultihash(c.Hash()) + // if there is a shard that can serve the retrieval for the given cid, we have the requested cid + // and has should return true. + shards, err := ro.d.ShardsContainingMultihash(ctx, c.Hash()) if err != nil { - logbs.Errorw("bitswap Has", "cid", c, "error", err) + logbs.Debugw("bitswap Has error", "cid", c, "err", err) return false, fmt.Errorf("failed to fetch shards containing the multihash %w", err) } if len(shards) == 0 { - logbs.Infow("bitswap Has: returning false no error", "cid", c) + logbs.Debugw("bitswap Has: returning false no error", "cid", c) return false, nil } - // if there is a shard we can serve the retrieval from, we have the requested cid. _, err = ro.shardSelectF(c, shards) if err != nil { - logbs.Errorw("bitswap Has", "cid", c, "err", err) + logbs.Debugw("bitswap Has error", "cid", c, "err", err) return false, fmt.Errorf("failed to select a shard: %w", err) } - logbs.Infow("bitswap Has: returning true", "cid", c) + logbs.Debugw("bitswap Has: returning true", "cid", c) return true, nil } -func (ro *AllShardsReadBlockstore) HashOnRead(_ bool) { - panic(errors.New("unsupported operation HashOnRead")) -} - -// GetSize returns the CIDs mapped BlockSize func (ro *AllShardsReadBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { - logbs.Infow("bitswap GetSize", "cid", c) + logbs.Debugw("bitswap GetSize called", "cid", c) blk, err := ro.Get(ctx, c) if err != nil { - logbs.Errorw("bitswap GetSize error", "cid", c, "error", err) + logbs.Debugw("bitswap GetSize error", "cid", c, "err", err) return 0, fmt.Errorf("failed to get block: %w", err) } - logbs.Infow("bitswap GetSize success", "cid", c) + logbs.Debugw("bitswap GetSize success", "cid", c) return len(blk.RawData()), nil } + +// --- UNSUPPORTED BLOCKSTORE METHODS ------- func (ro *AllShardsReadBlockstore) DeleteBlock(context.Context, cid.Cid) error { return errors.New("unsupported operation DeleteBlock") } +func (ro *AllShardsReadBlockstore) HashOnRead(_ bool) {} func (ro *AllShardsReadBlockstore) Put(context.Context, blocks.Block) error { return errors.New("unsupported operation Put") } diff --git a/readonly_bs_test.go b/readonly_bs_test.go new file mode 100644 index 0000000..69a78e3 --- /dev/null +++ b/readonly_bs_test.go @@ -0,0 +1,90 @@ +package dagstore + +import ( + "context" + "errors" + "testing" + + "github.com/multiformats/go-multihash" + + "github.com/filecoin-project/dagstore/shard" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + "github.com/stretchr/testify/require" +) + +var noOpSelector = func(c cid.Cid, shards []shard.Key) (shard.Key, error) { + return shards[0], nil +} + +func TestReadOnlyBs(t *testing.T) { + ctx := context.Background() + store := dssync.MutexWrap(datastore.NewMapDatastore()) + dagst, err := NewDAGStore(Config{ + MountRegistry: testRegistry(t), + TransientsDir: t.TempDir(), + Datastore: store, + }) + require.NoError(t, err) + + err = dagst.Start(context.Background()) + require.NoError(t, err) + + // two shards containing the same cid + keys := registerShards(t, dagst, 2, carv2mnt, RegisterOpts{}) + + rbs, err := dagst.AllShardsReadBlockstore(noOpSelector, 10, 10) + require.NoError(t, err) + + // iterate over the CARV2 Index for the given CARv2 file and ensure the readonly blockstore + // works for each of those cids + it, err := dagst.GetIterableIndex(keys[0]) + require.NoError(t, err) + + it.ForEach(func(mh multihash.Multihash, u uint64) error { + c := cid.NewCidV1(cid.Raw, mh) + + has, err := rbs.Has(ctx, c) + require.NoError(t, err) + require.True(t, has) + + blk, err := rbs.Get(ctx, c) + require.NoError(t, err) + require.NotEmpty(t, blk) + + sz, err := rbs.GetSize(ctx, c) + require.NoError(t, err) + require.EqualValues(t, len(blk.RawData()), sz) + + require.EqualValues(t, c, blk.Cid()) + return nil + }) + + // ------------------------------------------ + // Now test with a shard selector that rejects everything and ensure we always see errors + fss := func(c cid.Cid, shards []shard.Key) (shard.Key, error) { + return shard.Key{}, errors.New("rejected") + } + + rbs, err = dagst.AllShardsReadBlockstore(fss, 10, 10) + require.NoError(t, err) + it.ForEach(func(mh multihash.Multihash, u uint64) error { + c := cid.NewCidV1(cid.Raw, mh) + + has, err := rbs.Has(ctx, c) + require.Error(t, err) + require.False(t, has) + + blk, err := rbs.Get(ctx, c) + require.Error(t, err) + require.Empty(t, blk) + + sz, err := rbs.GetSize(ctx, c) + require.Error(t, err) + require.EqualValues(t, 0, sz) + + return nil + }) + +} From 388f51fadf00a3394671bfed9839f59a9c49c787 Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Tue, 29 Mar 2022 13:59:54 +0530 Subject: [PATCH 07/18] Apply suggestions from code review Co-authored-by: dirkmc --- readonly_bs.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readonly_bs.go b/readonly_bs.go index 205cc3d..42549bf 100644 --- a/readonly_bs.go +++ b/readonly_bs.go @@ -119,7 +119,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks return nil, fmt.Errorf("failed to select a shard: %w", err) } - // load blockstore for the selected shard and tru to serve the cid from that blockstore. + // load blockstore for the selected shard and try to serve the cid from that blockstore. resch := make(chan ShardResult, 1) if err := ro.d.AcquireShard(ctx, sk, resch, AcquireOpts{}); err != nil { return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) @@ -137,7 +137,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks sa := res.Accessor bs, err := sa.Blockstore() if err != nil { - return nil, fmt.Errorf("failed top load read only blockstore for shard %s: %w", sk, err) + return nil, fmt.Errorf("failed to load read only blockstore for shard %s: %w", sk, err) } blk, err := bs.Get(ctx, c) From 8d9286a428a423f85648493c514dd47dddad5d5f Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Tue, 29 Mar 2022 15:10:58 +0530 Subject: [PATCH 08/18] changes as per review --- readonly_bs.go => indexbs/indexbacked_bs.go | 99 ++++++++----------- .../indexbacked_bs_test.go | 33 +++++-- interface.go | 3 - 3 files changed, 69 insertions(+), 66 deletions(-) rename readonly_bs.go => indexbs/indexbacked_bs.go (57%) rename readonly_bs_test.go => indexbs/indexbacked_bs_test.go (66%) diff --git a/readonly_bs.go b/indexbs/indexbacked_bs.go similarity index 57% rename from readonly_bs.go rename to indexbs/indexbacked_bs.go index 42549bf..5a1e551 100644 --- a/readonly_bs.go +++ b/indexbs/indexbacked_bs.go @@ -1,47 +1,45 @@ -package dagstore +package indexbs import ( "context" "errors" "fmt" + "github.com/filecoin-project/dagstore" + blocks "github.com/ipfs/go-block-format" logging "github.com/ipfs/go-log/v2" "github.com/filecoin-project/dagstore/shard" lru "github.com/hashicorp/golang-lru" - - blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" blockstore "github.com/ipfs/go-ipfs-blockstore" ) var logbs = logging.Logger("dagstore-all-readblockstore") -var _ blockstore.Blockstore = (*AllShardsReadBlockstore)(nil) +var ErrBlockNotFound = errors.New("block not found") + +var _ blockstore.Blockstore = (*IndexBackedBlockstore)(nil) // ShardSelectorF helps select a shard to fetch a cid from if the given cid is present in multiple shards. type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) type accessorWithBlockstore struct { - sa *ShardAccessor - bs ReadBlockstore + sa *dagstore.ShardAccessor + bs dagstore.ReadBlockstore } -// AllShardsReadBlockstore is a read only blockstore over all cids across all shards in the dagstore. -type AllShardsReadBlockstore struct { - d *DAGStore +// IndexBackedBlockstore is a read only blockstore over all cids across all shards in the dagstore. +type IndexBackedBlockstore struct { + d *dagstore.DAGStore shardSelectF ShardSelectorF // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. // shard key -> read only blockstore blockstoreCache *lru.Cache - - // caches the blocks themselves -> can be scaled by using a redis/memcache etc distributed cache. - // multihash -> block - blockCache *lru.Cache } -func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { +func NewIndexBackedBlockstore(d *dagstore.DAGStore, shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { // instantiate the blockstore cache bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. @@ -52,34 +50,22 @@ func (d *DAGStore) AllShardsReadBlockstore(shardSelector ShardSelectorF, maxCach return nil, fmt.Errorf("failed to create lru cache for read only blockstores") } - // instantiate the block cache - blkLru, err := lru.New(maxBlocks) - if err != nil { - return nil, fmt.Errorf("failed to create lru cache for blocks: %w", err) - } - - return &AllShardsReadBlockstore{ + return &IndexBackedBlockstore{ d: d, shardSelectF: shardSelector, blockstoreCache: bslru, - blockCache: blkLru, }, nil } -func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block, finalErr error) { - logbs.Debugw("bitswap Get called", "cid", c) +func (ro *IndexBackedBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block, finalErr error) { + logbs.Debugw("Get called", "cid", c) defer func() { if finalErr != nil { - logbs.Debugw("bitswap Get: got error", "cid", c, "error", finalErr) + logbs.Debugw("Get: got error", "cid", c, "error", finalErr) } }() mhash := c.Hash() - // do we have the block cached ? - if val, ok := ro.blockCache.Get(mhash.String()); ok { - logbs.Debugw("bitswap Get: returning from block cache", "cid", c) - return val.(blocks.Block), nil - } // fetch all the shards containing the multihash shards, err := ro.d.ShardsContainingMultihash(ctx, mhash) @@ -87,7 +73,7 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks return nil, fmt.Errorf("failed to fetch shards containing the block: %w", err) } if len(shards) == 0 { - return nil, errors.New("no shards contain the requested block") + return nil, ErrBlockNotFound } // do we have a cached blockstore for a shard containing the required block ? If yes, serve the block from that blockstore @@ -101,13 +87,15 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks rbs := val.(*accessorWithBlockstore).bs blk, err := rbs.Get(ctx, c) if err != nil { + // we know that the cid we want to lookup belongs to a shard with key `sk` and + // so if we fail to get the corresponding block from the blockstore for that shards, something has gone wrong + // and we should remove the blockstore for that shard from our cache. ro.blockstoreCache.Remove(sk) continue } // add the block to the block cache - logbs.Debugw("bitswap Get: returning from block store cache", "cid", c) - ro.blockCache.Add(mhash.String(), blk) + logbs.Debugw("Get: returning from block store cache", "cid", c) return blk, nil } @@ -116,15 +104,15 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks // select a valid shard that can serve the retrieval sk, err := ro.shardSelectF(c, shards) if err != nil { - return nil, fmt.Errorf("failed to select a shard: %w", err) + return nil, ErrBlockNotFound } // load blockstore for the selected shard and try to serve the cid from that blockstore. - resch := make(chan ShardResult, 1) - if err := ro.d.AcquireShard(ctx, sk, resch, AcquireOpts{}); err != nil { + resch := make(chan dagstore.ShardResult, 1) + if err := ro.d.AcquireShard(ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) } - var res ShardResult + var res dagstore.ShardResult select { case <-ctx.Done(): return nil, ctx.Err() @@ -147,61 +135,60 @@ func (ro *AllShardsReadBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks // update the block cache and the blockstore cache ro.blockstoreCache.Add(sk, &accessorWithBlockstore{sa, bs}) - ro.blockCache.Add(mhash.String(), blk) - logbs.Debugw("bitswap Get: returning after creating new blockstore", "cid", c) + logbs.Debugw("Get: returning after creating new blockstore", "cid", c) return blk, nil } -func (ro *AllShardsReadBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) { - logbs.Debugw("bitswap Has called", "cid", c) +func (ro *IndexBackedBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) { + logbs.Debugw("Has called", "cid", c) // if there is a shard that can serve the retrieval for the given cid, we have the requested cid // and has should return true. shards, err := ro.d.ShardsContainingMultihash(ctx, c.Hash()) if err != nil { - logbs.Debugw("bitswap Has error", "cid", c, "err", err) - return false, fmt.Errorf("failed to fetch shards containing the multihash %w", err) + logbs.Debugw("Has error", "cid", c, "err", err) + return false, nil } if len(shards) == 0 { - logbs.Debugw("bitswap Has: returning false no error", "cid", c) + logbs.Debugw("Has: returning false no error", "cid", c) return false, nil } _, err = ro.shardSelectF(c, shards) if err != nil { - logbs.Debugw("bitswap Has error", "cid", c, "err", err) - return false, fmt.Errorf("failed to select a shard: %w", err) + logbs.Debugw("Has error", "cid", c, "err", err) + return false, ErrBlockNotFound } - logbs.Debugw("bitswap Has: returning true", "cid", c) + logbs.Debugw("Has: returning true", "cid", c) return true, nil } -func (ro *AllShardsReadBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { - logbs.Debugw("bitswap GetSize called", "cid", c) +func (ro *IndexBackedBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { + logbs.Debugw("GetSize called", "cid", c) blk, err := ro.Get(ctx, c) if err != nil { - logbs.Debugw("bitswap GetSize error", "cid", c, "err", err) + logbs.Debugw("GetSize error", "cid", c, "err", err) return 0, fmt.Errorf("failed to get block: %w", err) } - logbs.Debugw("bitswap GetSize success", "cid", c) + logbs.Debugw("GetSize success", "cid", c) return len(blk.RawData()), nil } // --- UNSUPPORTED BLOCKSTORE METHODS ------- -func (ro *AllShardsReadBlockstore) DeleteBlock(context.Context, cid.Cid) error { +func (ro *IndexBackedBlockstore) DeleteBlock(context.Context, cid.Cid) error { return errors.New("unsupported operation DeleteBlock") } -func (ro *AllShardsReadBlockstore) HashOnRead(_ bool) {} -func (ro *AllShardsReadBlockstore) Put(context.Context, blocks.Block) error { +func (ro *IndexBackedBlockstore) HashOnRead(_ bool) {} +func (ro *IndexBackedBlockstore) Put(context.Context, blocks.Block) error { return errors.New("unsupported operation Put") } -func (ro *AllShardsReadBlockstore) PutMany(context.Context, []blocks.Block) error { +func (ro *IndexBackedBlockstore) PutMany(context.Context, []blocks.Block) error { return errors.New("unsupported operation PutMany") } -func (ro *AllShardsReadBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { +func (ro *IndexBackedBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return nil, errors.New("unsupported operation AllKeysChan") } diff --git a/readonly_bs_test.go b/indexbs/indexbacked_bs_test.go similarity index 66% rename from readonly_bs_test.go rename to indexbs/indexbacked_bs_test.go index 69a78e3..6718c47 100644 --- a/readonly_bs_test.go +++ b/indexbs/indexbacked_bs_test.go @@ -1,10 +1,14 @@ -package dagstore +package indexbs import ( "context" "errors" "testing" + "github.com/filecoin-project/dagstore" + "github.com/filecoin-project/dagstore/mount" + "github.com/filecoin-project/dagstore/testdata" + "github.com/multiformats/go-multihash" "github.com/filecoin-project/dagstore/shard" @@ -18,10 +22,12 @@ var noOpSelector = func(c cid.Cid, shards []shard.Key) (shard.Key, error) { return shards[0], nil } +var carv2mnt = &mount.FSMount{FS: testdata.FS, Path: testdata.FSPathCarV2} + func TestReadOnlyBs(t *testing.T) { ctx := context.Background() store := dssync.MutexWrap(datastore.NewMapDatastore()) - dagst, err := NewDAGStore(Config{ + dagst, err := dagstore.NewDAGStore(dagstore.Config{ MountRegistry: testRegistry(t), TransientsDir: t.TempDir(), Datastore: store, @@ -31,15 +37,20 @@ func TestReadOnlyBs(t *testing.T) { err = dagst.Start(context.Background()) require.NoError(t, err) - // two shards containing the same cid - keys := registerShards(t, dagst, 2, carv2mnt, RegisterOpts{}) + // register a shard + ch := make(chan dagstore.ShardResult, 1) + sk := shard.KeyFromString("test1") + err = dagst.RegisterShard(context.Background(), sk, carv2mnt, ch, dagstore.RegisterOpts{}) + require.NoError(t, err) + res := <-ch + require.NoError(t, res.Error) - rbs, err := dagst.AllShardsReadBlockstore(noOpSelector, 10, 10) + rbs, err := NewIndexBackedBlockstore(dagst, noOpSelector, 10, 10) require.NoError(t, err) // iterate over the CARV2 Index for the given CARv2 file and ensure the readonly blockstore // works for each of those cids - it, err := dagst.GetIterableIndex(keys[0]) + it, err := dagst.GetIterableIndex(sk) require.NoError(t, err) it.ForEach(func(mh multihash.Multihash, u uint64) error { @@ -67,7 +78,7 @@ func TestReadOnlyBs(t *testing.T) { return shard.Key{}, errors.New("rejected") } - rbs, err = dagst.AllShardsReadBlockstore(fss, 10, 10) + rbs, err = NewIndexBackedBlockstore(dagst, fss, 10, 10) require.NoError(t, err) it.ForEach(func(mh multihash.Multihash, u uint64) error { c := cid.NewCidV1(cid.Raw, mh) @@ -86,5 +97,13 @@ func TestReadOnlyBs(t *testing.T) { return nil }) +} +func testRegistry(t *testing.T) *mount.Registry { + r := mount.NewRegistry() + err := r.Register("fs", &mount.FSMount{FS: testdata.FS}) + require.NoError(t, err) + err = r.Register("counting", new(mount.Counting)) + require.NoError(t, err) + return r } diff --git a/interface.go b/interface.go index 6df9ac2..b5ab602 100644 --- a/interface.go +++ b/interface.go @@ -3,8 +3,6 @@ package dagstore import ( "context" - blockstore "github.com/ipfs/go-ipfs-blockstore" - carindex "github.com/ipld/go-car/v2/index" mh "github.com/multiformats/go-multihash" @@ -26,5 +24,4 @@ type Interface interface { ShardsContainingMultihash(ctx context.Context, h mh.Multihash) ([]shard.Key, error) GC(ctx context.Context) (*GCResult, error) Close() error - AllShardsReadBlockstore(shardSelector ShardSelectorF, maxBSCachesize int, maxBlkCachesize int) (blockstore.Blockstore, error) } From 8387a9e64edee5cea6d61e1738694b5f3ab2b0cb Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Tue, 29 Mar 2022 17:47:24 +0530 Subject: [PATCH 09/18] thread safe --- indexbs/indexbacked_bs.go | 82 +++++++++++++++++++++++++--------- indexbs/indexbacked_bs_test.go | 58 +++++++++++++++++------- 2 files changed, 103 insertions(+), 37 deletions(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index 5a1e551..cca7dfb 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "sync" "github.com/filecoin-project/dagstore" blocks "github.com/ipfs/go-block-format" @@ -21,7 +22,11 @@ var ErrBlockNotFound = errors.New("block not found") var _ blockstore.Blockstore = (*IndexBackedBlockstore)(nil) +// ErrNoShardSelected means that the shard selection function rejected all of the given shards. +var ErrNoShardSelected = errors.New("no shard selected") + // ShardSelectorF helps select a shard to fetch a cid from if the given cid is present in multiple shards. +// It should return `ErrNoShardSelected` if none of the given shard is selected. type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) type accessorWithBlockstore struct { @@ -34,9 +39,8 @@ type IndexBackedBlockstore struct { d *dagstore.DAGStore shardSelectF ShardSelectorF - // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. - // shard key -> read only blockstore - blockstoreCache *lru.Cache + bsStripedLocks [256]sync.Mutex + blockstoreCache *lru.Cache // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). } func NewIndexBackedBlockstore(d *dagstore.DAGStore, shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { @@ -78,34 +82,41 @@ func (ro *IndexBackedBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.B // do we have a cached blockstore for a shard containing the required block ? If yes, serve the block from that blockstore for _, sk := range shards { - // a valid cache hit here updates the priority of the shard's blockstore in the LRU cache. - val, ok := ro.blockstoreCache.Get(sk) - if !ok { - continue - } + lk := &ro.bsStripedLocks[shardKeyToStriped(sk)] + lk.Lock() - rbs := val.(*accessorWithBlockstore).bs - blk, err := rbs.Get(ctx, c) - if err != nil { - // we know that the cid we want to lookup belongs to a shard with key `sk` and - // so if we fail to get the corresponding block from the blockstore for that shards, something has gone wrong - // and we should remove the blockstore for that shard from our cache. - ro.blockstoreCache.Remove(sk) - continue + blk, err := ro.readFromBSCacheUnlocked(ctx, c, sk) + if err == nil && blk != nil { + logbs.Debugw("Get: returning from block store cache", "cid", c) + + lk.Unlock() + return blk, nil } - // add the block to the block cache - logbs.Debugw("Get: returning from block store cache", "cid", c) - return blk, nil + lk.Unlock() } // ---- we don't have a cached blockstore for a shard that can serve the block -> let's build one. // select a valid shard that can serve the retrieval sk, err := ro.shardSelectF(c, shards) - if err != nil { + if err != nil && err == ErrNoShardSelected { return nil, ErrBlockNotFound } + if err != nil { + return nil, fmt.Errorf("failed to run shard selection function: %w", err) + } + + lk := &ro.bsStripedLocks[shardKeyToStriped(sk)] + lk.Lock() + defer lk.Unlock() + + // see if we have blockstore in the cache we can serve the retrieval from as the previous code in this critical section + // could have added a blockstore to the cache for the given shard key. + blk, err := ro.readFromBSCacheUnlocked(ctx, c, sk) + if err == nil && blk != nil { + return blk, nil + } // load blockstore for the selected shard and try to serve the cid from that blockstore. resch := make(chan dagstore.ShardResult, 1) @@ -128,7 +139,7 @@ func (ro *IndexBackedBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.B return nil, fmt.Errorf("failed to load read only blockstore for shard %s: %w", sk, err) } - blk, err := bs.Get(ctx, c) + blk, err = bs.Get(ctx, c) if err != nil { return nil, fmt.Errorf("failed to get block: %w", err) } @@ -156,9 +167,13 @@ func (ro *IndexBackedBlockstore) Has(ctx context.Context, c cid.Cid) (bool, erro } _, err = ro.shardSelectF(c, shards) + if err != nil && err == ErrNoShardSelected { + logbs.Debugw("Has error", "cid", c, "err", err) + return false, nil + } if err != nil { logbs.Debugw("Has error", "cid", c, "err", err) - return false, ErrBlockNotFound + return false, fmt.Errorf("failed to run shard selection function: %w", err) } logbs.Debugw("Has: returning true", "cid", c) @@ -178,6 +193,29 @@ func (ro *IndexBackedBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, e return len(blk.RawData()), nil } +func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c cid.Cid, sk shard.Key) (blocks.Block, error) { + val, ok := ro.blockstoreCache.Get(sk) + if !ok { + return nil, ErrBlockNotFound + } + + rbs := val.(*accessorWithBlockstore).bs + blk, err := rbs.Get(ctx, c) + if err != nil { + // we know that the cid we want to lookup belongs to a shard with key `sk` and + // so if we fail to get the corresponding block from the blockstore for that shards, something has gone wrong + // and we should remove the blockstore for that shard from our cache. + ro.blockstoreCache.Remove(sk) + return nil, err + } + + return blk, nil +} + +func shardKeyToStriped(sk shard.Key) byte { + return sk.String()[len(sk.String())-1] +} + // --- UNSUPPORTED BLOCKSTORE METHODS ------- func (ro *IndexBackedBlockstore) DeleteBlock(context.Context, cid.Cid) error { return errors.New("unsupported operation DeleteBlock") diff --git a/indexbs/indexbacked_bs_test.go b/indexbs/indexbacked_bs_test.go index 6718c47..6c640b5 100644 --- a/indexbs/indexbacked_bs_test.go +++ b/indexbs/indexbacked_bs_test.go @@ -5,6 +5,8 @@ import ( "errors" "testing" + "golang.org/x/sync/errgroup" + "github.com/filecoin-project/dagstore" "github.com/filecoin-project/dagstore/mount" "github.com/filecoin-project/dagstore/testdata" @@ -53,25 +55,51 @@ func TestReadOnlyBs(t *testing.T) { it, err := dagst.GetIterableIndex(sk) require.NoError(t, err) - it.ForEach(func(mh multihash.Multihash, u uint64) error { - c := cid.NewCidV1(cid.Raw, mh) - - has, err := rbs.Has(ctx, c) - require.NoError(t, err) - require.True(t, has) - - blk, err := rbs.Get(ctx, c) - require.NoError(t, err) - require.NotEmpty(t, blk) + var errg errgroup.Group + + it.ForEach(func(mh multihash.Multihash, _ uint64) error { + + mhs := mh + errg.Go(func() error { + c := cid.NewCidV1(cid.Raw, mhs) + + // Has + has, err := rbs.Has(ctx, c) + if err != nil { + return err + } + if !has { + return errors.New("has should be true") + } + + // Get + blk, err := rbs.Get(ctx, c) + if err != nil { + return err + } + if blk == nil { + return errors.New("block should not be empty") + } + + // GetSize + _, err = rbs.GetSize(ctx, c) + if err != nil { + return err + } + + // ensure cids match + if blk.Cid() != c { + return errors.New("cid mismatch") + } + return nil + + }) - sz, err := rbs.GetSize(ctx, c) - require.NoError(t, err) - require.EqualValues(t, len(blk.RawData()), sz) - - require.EqualValues(t, c, blk.Cid()) return nil }) + require.NoError(t, errg.Wait()) + // ------------------------------------------ // Now test with a shard selector that rejects everything and ensure we always see errors fss := func(c cid.Cid, shards []shard.Key) (shard.Key, error) { From 7bd999d2098670abf8a5992750ef46d9d95d31fa Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Tue, 29 Mar 2022 19:53:27 +0530 Subject: [PATCH 10/18] better docs --- indexbs/indexbacked_bs.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index cca7dfb..a21003e 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -193,8 +193,9 @@ func (ro *IndexBackedBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, e return len(blk.RawData()), nil } -func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c cid.Cid, sk shard.Key) (blocks.Block, error) { - val, ok := ro.blockstoreCache.Get(sk) +func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c cid.Cid, shardContainingCid shard.Key) (blocks.Block, error) { + // We've already ensured that the given shard has the cid/multihash we are looking for. + val, ok := ro.blockstoreCache.Get(shardContainingCid) if !ok { return nil, ErrBlockNotFound } @@ -205,7 +206,7 @@ func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c // we know that the cid we want to lookup belongs to a shard with key `sk` and // so if we fail to get the corresponding block from the blockstore for that shards, something has gone wrong // and we should remove the blockstore for that shard from our cache. - ro.blockstoreCache.Remove(sk) + ro.blockstoreCache.Remove(shardContainingCid) return nil, err } From 51396854de25cc22d723ddcb83a33048450b7c89 Mon Sep 17 00:00:00 2001 From: Aarsh Shah Date: Wed, 30 Mar 2022 14:41:51 +0530 Subject: [PATCH 11/18] remove redundant param --- indexbs/indexbacked_bs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index a21003e..01da7e2 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -43,7 +43,7 @@ type IndexBackedBlockstore struct { blockstoreCache *lru.Cache // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). } -func NewIndexBackedBlockstore(d *dagstore.DAGStore, shardSelector ShardSelectorF, maxCacheSize int, maxBlocks int) (blockstore.Blockstore, error) { +func NewIndexBackedBlockstore(d *dagstore.DAGStore, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { // instantiate the blockstore cache bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. From 8027d20e9980be65ee4cea64273cb0ba56a790b3 Mon Sep 17 00:00:00 2001 From: hannahhoward Date: Thu, 18 Aug 2022 01:31:08 -0700 Subject: [PATCH 12/18] chore(deps): upgrade deps --- go.mod | 10 +++--- go.sum | 58 +++++++++++++++++++++++++++++----- indexbs/indexbacked_bs_test.go | 4 +-- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/go.mod b/go.mod index 0a4aefd..9960473 100644 --- a/go.mod +++ b/go.mod @@ -5,18 +5,20 @@ go 1.16 require ( github.com/hashicorp/golang-lru v0.5.4 github.com/ipfs/go-block-format v0.0.3 + github.com/ipfs/go-blockservice v0.4.0 // indirect github.com/ipfs/go-cid v0.1.0 github.com/ipfs/go-datastore v0.5.0 github.com/ipfs/go-ds-leveldb v0.5.0 - github.com/ipfs/go-ipfs-blockstore v1.1.2 + github.com/ipfs/go-ipfs-blockstore v1.2.0 github.com/ipfs/go-ipfs-blocksutil v0.0.1 github.com/ipfs/go-log/v2 v2.3.0 - github.com/ipld/go-car/v2 v2.1.1 + github.com/ipfs/go-merkledag v0.6.0 // indirect + github.com/ipld/go-car/v2 v2.4.1 github.com/libp2p/go-libp2p-core v0.9.0 // indirect github.com/mr-tron/base58 v1.2.0 - github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61 + github.com/multiformats/go-multicodec v0.5.0 github.com/multiformats/go-multihash v0.1.0 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.7.1 github.com/syndtr/goleveldb v1.0.0 github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 golang.org/x/exp v0.0.0-20210714144626-1041f73d31d8 diff --git a/go.sum b/go.sum index 4a7f276..6ba373a 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,7 @@ github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETF github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/Stebalien/go-bitfield v0.0.1/go.mod h1:GNjFpasyUVkHMsfEOk8EFLJ9syQ6SI+XWrX9Wf2XH0s= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= @@ -26,6 +27,7 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= @@ -122,6 +124,7 @@ github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2 github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= +github.com/frankban/quicktest v1.14.2/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= @@ -137,6 +140,11 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= @@ -183,8 +191,9 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -248,13 +257,18 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/go-bitswap v0.5.1 h1:721YAEDBnLIrvcIMkCHCdqp34hA8jwL9yKMkyJpSpco= +github.com/ipfs/go-bitfield v1.0.0/go.mod h1:N/UiujQy+K+ceU1EF5EkVd1TNqevLrCQMIcAEPrdtus= github.com/ipfs/go-bitswap v0.5.1/go.mod h1:P+ckC87ri1xFLvk74NlXdP0Kj9RmWAh4+H78sC6Qopo= +github.com/ipfs/go-bitswap v0.6.0/go.mod h1:Hj3ZXdOC5wBJvENtdqsixmzzRukqd8EHLxZLZc3mzRA= +github.com/ipfs/go-bitswap v0.8.0 h1:UEV7kogQu2iGggkE9GhLykDrRCUpsNnpu2NODww/srw= +github.com/ipfs/go-bitswap v0.8.0/go.mod h1:/h8sBij8UVEaNWl8ABzpLRA5Y1cttdNUnpeGo2AA/LQ= github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= github.com/ipfs/go-block-format v0.0.3 h1:r8t66QstRp/pd/or4dpnbVfXT5Gt7lOqRvC+/dDTpMc= github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= -github.com/ipfs/go-blockservice v0.2.1 h1:NJ4j/cwEfIg60rzAWcCIxRtOwbf6ZPK49MewNxObCPQ= github.com/ipfs/go-blockservice v0.2.1/go.mod h1:k6SiwmgyYgs4M/qt+ww6amPeUH9EISLRBnvUurKJhi8= +github.com/ipfs/go-blockservice v0.3.0/go.mod h1:P5ppi8IHDC7O+pA0AlGTF09jruB2h+oP3wVVaZl8sfk= +github.com/ipfs/go-blockservice v0.4.0 h1:7MUijAW5SqdsqEW/EhnNFRJXVF8mGU5aGhZ3CQaCWbY= +github.com/ipfs/go-blockservice v0.4.0/go.mod h1:kRjO3wlGW9mS1aKuiCeGhx9K1DagQ10ACpVO59qgAx4= github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.2/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= @@ -284,20 +298,27 @@ github.com/ipfs/go-ds-leveldb v0.4.2/go.mod h1:jpbku/YqBSsBc1qgME8BkWS4AxzF2cEu1 github.com/ipfs/go-ds-leveldb v0.5.0 h1:s++MEBbD3ZKc9/8/njrn4flZLnCuY9I79v94gBUNumo= github.com/ipfs/go-ds-leveldb v0.5.0/go.mod h1:d3XG9RUDzQ6V4SHi8+Xgj9j1XuEk1z82lquxrVbml/Q= github.com/ipfs/go-ipfs-blockstore v0.2.1/go.mod h1:jGesd8EtCM3/zPgx+qr0/feTXGUeRai6adgwC+Q+JvE= -github.com/ipfs/go-ipfs-blockstore v1.1.2 h1:WCXoZcMYnvOTmlpX+RSSnhVN0uCmbWTeepTGX5lgiXw= github.com/ipfs/go-ipfs-blockstore v1.1.2/go.mod h1:w51tNR9y5+QXB0wkNcHt4O2aSZjTdqaEWaQdSxEyUOY= +github.com/ipfs/go-ipfs-blockstore v1.2.0 h1:n3WTeJ4LdICWs/0VSfjHrlqpPpl6MZ+ySd3j8qz0ykw= +github.com/ipfs/go-ipfs-blockstore v1.2.0/go.mod h1:eh8eTFLiINYNSNawfZOC7HOxNTxpB1PFuA5E1m/7exE= github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IWFJMcGIPQ= github.com/ipfs/go-ipfs-blocksutil v0.0.1/go.mod h1:Yq4M86uIOmxmGPUHv/uI7uKqZNtLb449gwKqXjIsnRk= +github.com/ipfs/go-ipfs-chunker v0.0.1/go.mod h1:tWewYK0we3+rMbOh7pPFGDyypCtvGcBFymgY4rSDLAw= github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-delay v0.0.1/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-ds-help v0.1.1/go.mod h1:SbBafGJuGsPI/QL3j9Fc5YPLeAu+SzOkI0gFwAg+mOs= github.com/ipfs/go-ipfs-ds-help v1.1.0 h1:yLE2w9RAsl31LtfMt91tRZcrx+e61O5mDxFRR994w4Q= github.com/ipfs/go-ipfs-ds-help v1.1.0/go.mod h1:YR5+6EaebOhfcqVCyqemItCLthrpVNot+rsOU/5IatU= -github.com/ipfs/go-ipfs-exchange-interface v0.1.0 h1:TiMekCrOGQuWYtZO3mf4YJXDIdNgnKWZ9IE3fGlnWfo= github.com/ipfs/go-ipfs-exchange-interface v0.1.0/go.mod h1:ych7WPlyHqFvCi/uQI48zLZuAWVP5iTQPXEfVaw5WEI= -github.com/ipfs/go-ipfs-exchange-offline v0.1.1 h1:mEiXWdbMN6C7vtDG21Fphx8TGCbZPpQnz/496w/PL4g= +github.com/ipfs/go-ipfs-exchange-interface v0.2.0 h1:8lMSJmKogZYNo2jjhUs0izT+dck05pqUw4mWNW9Pw6Y= +github.com/ipfs/go-ipfs-exchange-interface v0.2.0/go.mod h1:z6+RhJuDQbqKguVyslSOuVDhqF9JtTrO3eptSAiW2/Y= github.com/ipfs/go-ipfs-exchange-offline v0.1.1/go.mod h1:vTiBRIbzSwDD0OWm+i3xeT0mO7jG2cbJYatp3HPk5XY= +github.com/ipfs/go-ipfs-exchange-offline v0.2.0/go.mod h1:HjwBeW0dvZvfOMwDP0TSKXIHf2s+ksdP4E3MLDRtLKY= +github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uYokgWRFidfvEkuA= +github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s= +github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4= +github.com/ipfs/go-ipfs-posinfo v0.0.1/go.mod h1:SwyeVP+jCwiDu0C313l/8jg6ZxM0qqtlt2a0vILTc1A= github.com/ipfs/go-ipfs-pq v0.0.2 h1:e1vOOW6MuOwG2lqxcLA+wEn93i/9laCY8sXAw76jFOY= github.com/ipfs/go-ipfs-pq v0.0.2/go.mod h1:LWIqQpqfRG3fNc5XsnIhz/wQ2XXGyugQwls7BgUmUfY= github.com/ipfs/go-ipfs-routing v0.2.1 h1:E+whHWhJkdN9YeoHZNj5itzc+OR292AJ2uE9FFiW0BY= @@ -308,8 +329,9 @@ github.com/ipfs/go-ipfs-util v0.0.2/go.mod h1:CbPtkWJzjLdEcezDns2XYaehFVNXG9zrdr github.com/ipfs/go-ipld-cbor v0.0.5 h1:ovz4CHKogtG2KB/h1zUp5U0c/IzZrL435rCh5+K/5G8= github.com/ipfs/go-ipld-cbor v0.0.5/go.mod h1:BkCduEx3XBCO6t2Sfo5BaHzuok7hbhdMm9Oh8B2Ftq4= github.com/ipfs/go-ipld-format v0.0.1/go.mod h1:kyJtbkDALmFHv3QR6et67i35QzO3S0dCDnkOJhcZkms= -github.com/ipfs/go-ipld-format v0.2.0 h1:xGlJKkArkmBvowr+GMCX0FEZtkro71K1AwiKnL37mwA= github.com/ipfs/go-ipld-format v0.2.0/go.mod h1:3l3C1uKoadTPbeNfrDi+xMInYKlx2Cvg1BuydPSdzQs= +github.com/ipfs/go-ipld-format v0.3.0 h1:Mwm2oRLzIuUwEPewWAWyMuuBQUsn3awfFEYVb8akMOQ= +github.com/ipfs/go-ipld-format v0.3.0/go.mod h1:co/SdBE8h99968X0hViiw1MNlh6fvxxnHpvVLnH7jSM= github.com/ipfs/go-ipld-legacy v0.1.0 h1:wxkkc4k8cnvIGIjPO0waJCe7SHEyFgl+yQdafdjGrpA= github.com/ipfs/go-ipld-legacy v0.1.0/go.mod h1:86f5P/srAmh9GcIcWQR9lfFLZPrIyyXQeVlOWeeWEuI= github.com/ipfs/go-log v0.0.1/go.mod h1:kL1d2/hzSpI0thNYjiKfjanbVNU+IIGA/WnNESY9leM= @@ -327,20 +349,30 @@ github.com/ipfs/go-log/v2 v2.3.0 h1:31Re/cPqFHpsRHgyVwjWADPoF0otB1WrjTy8ZFYwEZU= github.com/ipfs/go-log/v2 v2.3.0/go.mod h1:QqGoj30OTpnKaG/LKTGTxoP2mmQtjVMEnK72gynbe/g= github.com/ipfs/go-merkledag v0.5.1 h1:tr17GPP5XtPhvPPiWtu20tSGZiZDuTaJRXBLcr79Umk= github.com/ipfs/go-merkledag v0.5.1/go.mod h1:cLMZXx8J08idkp5+id62iVftUQV+HlYJ3PIhDfZsjA4= +github.com/ipfs/go-merkledag v0.6.0 h1:oV5WT2321tS4YQVOPgIrWHvJ0lJobRTerU+i9nmUCuA= +github.com/ipfs/go-merkledag v0.6.0/go.mod h1:9HSEwRd5sV+lbykiYP+2NC/3o6MZbKNaa4hfNcH5iH0= github.com/ipfs/go-metrics-interface v0.0.1 h1:j+cpbjYvu4R8zbleSs36gvB7jR+wsL2fGD6n0jO4kdg= github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j/b/tL7HTWtJ4VPgWY= github.com/ipfs/go-peertaskqueue v0.7.0 h1:VyO6G4sbzX80K58N60cCaHsSsypbUNs1GjO5seGNsQ0= github.com/ipfs/go-peertaskqueue v0.7.0/go.mod h1:M/akTIE/z1jGNXMU7kFB4TeSEFvj68ow0Rrb04donIU= +github.com/ipfs/go-unixfs v0.3.1/go.mod h1:h4qfQYzghiIc8ZNFKiLMFWOTzrWIAtzYQ59W/pCFf1o= +github.com/ipfs/go-unixfsnode v1.4.0/go.mod h1:qc7YFFZ8tABc58p62HnIYbUMwj9chhUuFWmxSokfePo= github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2E= github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0= github.com/ipld/go-car/v2 v2.1.1 h1:saaKz4nC0AdfCGHLYKeXLGn8ivoPC54fyS55uyOLKwA= github.com/ipld/go-car/v2 v2.1.1/go.mod h1:+2Yvf0Z3wzkv7NeI69i8tuZ+ft7jyjPYIWZzeVNeFcI= +github.com/ipld/go-car/v2 v2.4.1 h1:9S+FYbQzQJ/XzsdiOV13W5Iu/i+gUnr6csbSD9laFEg= +github.com/ipld/go-car/v2 v2.4.1/go.mod h1:zjpRf0Jew9gHqSvjsKVyoq9OY9SWoEKdYCQUKVaaPT0= github.com/ipld/go-codec-dagpb v1.3.0 h1:czTcaoAuNNyIYWs6Qe01DJ+sEX7B+1Z0LcXjSatMGe8= github.com/ipld/go-codec-dagpb v1.3.0/go.mod h1:ga4JTU3abYApDC3pZ00BC2RSvC3qfBb9MSJkMLSwnhA= +github.com/ipld/go-codec-dagpb v1.3.1 h1:yVNlWRQexCa54ln3MSIiUN++ItH7pdhBFhh0hSgZu1w= +github.com/ipld/go-codec-dagpb v1.3.1/go.mod h1:ErNNglIi5KMur/MfFE/svtgQthzVvf+43MrzLbpcIZY= github.com/ipld/go-ipld-prime v0.9.1-0.20210324083106-dc342a9917db/go.mod h1:KvBLMr4PX1gWptgkzRjVZCrLmSGcZCb/jioOQwCqZN8= github.com/ipld/go-ipld-prime v0.11.0/go.mod h1:+WIAkokurHmZ/KwzDOMUuoeJgaRQktHtEaLglS3ZeV8= github.com/ipld/go-ipld-prime v0.14.0 h1:2FnBqUjmmgxgZD6/zB3eygWlmIsHNGrZ57L99x3xD6Q= github.com/ipld/go-ipld-prime v0.14.0/go.mod h1:9ASQLwUFLptCov6lIYc70GRB4V7UTyLD0IJtrDJe6ZM= +github.com/ipld/go-ipld-prime v0.16.0 h1:RS5hhjB/mcpeEPJvfyj0qbOj/QL+/j05heZ0qa97dVo= +github.com/ipld/go-ipld-prime v0.16.0/go.mod h1:axSCuOCBPqrH+gvXr2w9uAOulJqBPhHPT2PjoiiU1qA= github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20211210234204-ce2a1c70cd73 h1:TsyATB2ZRRQGTwafJdgEUQkmjOExRV0DNokcihZxbnQ= github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20211210234204-ce2a1c70cd73/go.mod h1:2PJ0JgxyB08t0b2WKrcuqI3di0V+5n6RS/LTUJhkoxY= github.com/jackpal/gateway v1.0.5/go.mod h1:lTpwd4ACLXmpyiCTRtfiNyVnUmqT9RivzCDQetPfnjA= @@ -666,6 +698,9 @@ github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPw github.com/multiformats/go-multicodec v0.3.0/go.mod h1:qGGaQmioCDh+TeFOnxrbU0DaIPw8yFgAZgFG0V7p1qQ= github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61 h1:ZrUuMKNgJ52qHPoQ+bx0h0uBfcWmN7Px+4uKSZeesiI= github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61/go.mod h1:1Hj/eHRaVWSXiSNNfcEPcwZleTmdNP81xlxDLnWU9GQ= +github.com/multiformats/go-multicodec v0.3.1-0.20211210143421-a526f306ed2c/go.mod h1:1Hj/eHRaVWSXiSNNfcEPcwZleTmdNP81xlxDLnWU9GQ= +github.com/multiformats/go-multicodec v0.5.0 h1:EgU6cBe/D7WRwQb1KmnBvU7lrcFGMggZVTPtOW9dDHs= +github.com/multiformats/go-multicodec v0.5.0/go.mod h1:DiY2HFaEp5EhEXb/iYzVAunmyX/aSFMxq2KMKfWEues= github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= github.com/multiformats/go-multihash v0.0.5/go.mod h1:lt/HCbqlQwlPBz7lv0sQCdtfcMtlJvakRUn/0Ual8po= github.com/multiformats/go-multihash v0.0.8/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= @@ -849,8 +884,9 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= @@ -869,6 +905,7 @@ github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 h1:5HZfQkwe0mIf github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11/go.mod h1:Wlo/SzPmxVp6vXpGt/zaXhHH0fn4IxgqZc82aKg6bpQ= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 h1:WXhVOwj2USAXB5oMDwRl3piOux2XMV9TANaYxXHdkoE= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158/go.mod h1:Xj/M2wWU+QdTdRbu/L/1dIZY8/Wb2K9pAhtroQuxJJI= +github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc= github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc/go.mod h1:bopw91TMyo8J3tvftk8xmU2kPmlrt4nScJQZU2hE5EM= github.com/whyrusleeping/go-logging v0.0.1/go.mod h1:lDPYj54zutzG1XYfHAhcc7oNXEburHQBn+Iqd4yS4vE= @@ -892,10 +929,14 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= +go.opentelemetry.io/otel v1.7.0 h1:Z2lA3Tdch0iDcrhJXDIlC94XE+bxok1F9B+4Lz/lGsM= +go.opentelemetry.io/otel v1.7.0/go.mod h1:5BdUoMIz5WEs0vt0CUEMtSSaTSHBBVwrhnz7+nrD5xk= go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= go.opentelemetry.io/otel/sdk v0.20.0/go.mod h1:g/IcepuwNsoiX5Byy2nNV0ySUF1em498m7hBWC279Yc= go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw= +go.opentelemetry.io/otel/trace v1.7.0 h1:O37Iogk1lEkMRXewVtZ1BBTVn5JEp8GrJvP92bJqC6o= +go.opentelemetry.io/otel/trace v1.7.0/go.mod h1:fzLSB9nqR2eXzxPXb2JW9IKE+ScyXA48yyE4TNvoHqU= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -1035,6 +1076,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190228124157-a34e9553db1e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190302025703-b6889370fb10/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190405154228-4b34438f7a67/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/indexbs/indexbacked_bs_test.go b/indexbs/indexbacked_bs_test.go index 6c640b5..ba06581 100644 --- a/indexbs/indexbacked_bs_test.go +++ b/indexbs/indexbacked_bs_test.go @@ -47,7 +47,7 @@ func TestReadOnlyBs(t *testing.T) { res := <-ch require.NoError(t, res.Error) - rbs, err := NewIndexBackedBlockstore(dagst, noOpSelector, 10, 10) + rbs, err := NewIndexBackedBlockstore(dagst, noOpSelector, 10) require.NoError(t, err) // iterate over the CARV2 Index for the given CARv2 file and ensure the readonly blockstore @@ -106,7 +106,7 @@ func TestReadOnlyBs(t *testing.T) { return shard.Key{}, errors.New("rejected") } - rbs, err = NewIndexBackedBlockstore(dagst, fss, 10, 10) + rbs, err = NewIndexBackedBlockstore(dagst, fss, 10) require.NoError(t, err) it.ForEach(func(mh multihash.Multihash, u uint64) error { c := cid.NewCidV1(cid.Raw, mh) From aba4e75dd27a09af89604eda314027cf84efba4e Mon Sep 17 00:00:00 2001 From: hannahhoward Date: Thu, 18 Aug 2022 01:37:58 -0700 Subject: [PATCH 13/18] refactor(indexbs): use dagstore.Interface direct pointer to DAG store is not always available, better mocking --- indexbs/indexbacked_bs.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index 01da7e2..d942405 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -36,14 +36,14 @@ type accessorWithBlockstore struct { // IndexBackedBlockstore is a read only blockstore over all cids across all shards in the dagstore. type IndexBackedBlockstore struct { - d *dagstore.DAGStore + d dagstore.Interface shardSelectF ShardSelectorF bsStripedLocks [256]sync.Mutex blockstoreCache *lru.Cache // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). } -func NewIndexBackedBlockstore(d *dagstore.DAGStore, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { +func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { // instantiate the blockstore cache bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. From 5fd66219efa1173edd4bda03b440955d8f9d7169 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 6 Sep 2022 00:56:53 +0800 Subject: [PATCH 14/18] feat: support GetSize for index backed blockstore --- go.mod | 3 +- go.sum | 14 +- indexbs/indexbacked_bs.go | 245 ++++++++++++++++++++------------- indexbs/indexbacked_bs_test.go | 64 ++++++++- 4 files changed, 221 insertions(+), 105 deletions(-) diff --git a/go.mod b/go.mod index 9960473..de9b4f9 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/filecoin-project/dagstore go 1.16 require ( - github.com/hashicorp/golang-lru v0.5.4 + github.com/hnlq715/golang-lru v0.3.0 github.com/ipfs/go-block-format v0.0.3 github.com/ipfs/go-blockservice v0.4.0 // indirect github.com/ipfs/go-cid v0.1.0 @@ -12,7 +12,6 @@ require ( github.com/ipfs/go-ipfs-blockstore v1.2.0 github.com/ipfs/go-ipfs-blocksutil v0.0.1 github.com/ipfs/go-log/v2 v2.3.0 - github.com/ipfs/go-merkledag v0.6.0 // indirect github.com/ipld/go-car/v2 v2.4.1 github.com/libp2p/go-libp2p-core v0.9.0 // indirect github.com/mr-tron/base58 v1.2.0 diff --git a/go.sum b/go.sum index 6ba373a..faf7710 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,7 @@ github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETF github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/Stebalien/go-bitfield v0.0.1 h1:X3kbSSPUaJK60wV2hjOPZwmpljr6VGCqdq4cBLhbQBo= github.com/Stebalien/go-bitfield v0.0.1/go.mod h1:GNjFpasyUVkHMsfEOk8EFLJ9syQ6SI+XWrX9Wf2XH0s= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= @@ -122,8 +123,8 @@ github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiD github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= +github.com/frankban/quicktest v1.14.2 h1:SPb1KFFmM+ybpEjPUhCCkZOM5xlovT5UbrMvWnXyBns= github.com/frankban/quicktest v1.14.2/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= @@ -248,6 +249,8 @@ github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/hnlq715/golang-lru v0.3.0 h1:eJtRD3bIw/dxwha16+urdY7bGfoCy/fAM+A/gahvYJM= +github.com/hnlq715/golang-lru v0.3.0/go.mod h1:RBkgDAtlu0SgTPvpb4VW2/RQnkCBMRD3Lr6B9RhsAS8= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= github.com/huin/goupnp v1.0.0 h1:wg75sLpL6DZqwHQN6E1Cfk6mtfzS45z8OV+ic+DtHRo= @@ -257,6 +260,7 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= +github.com/ipfs/go-bitfield v1.0.0 h1:y/XHm2GEmD9wKngheWNNCNL0pzrWXZwCdQGv1ikXknQ= github.com/ipfs/go-bitfield v1.0.0/go.mod h1:N/UiujQy+K+ceU1EF5EkVd1TNqevLrCQMIcAEPrdtus= github.com/ipfs/go-bitswap v0.5.1/go.mod h1:P+ckC87ri1xFLvk74NlXdP0Kj9RmWAh4+H78sC6Qopo= github.com/ipfs/go-bitswap v0.6.0/go.mod h1:Hj3ZXdOC5wBJvENtdqsixmzzRukqd8EHLxZLZc3mzRA= @@ -303,6 +307,7 @@ github.com/ipfs/go-ipfs-blockstore v1.2.0 h1:n3WTeJ4LdICWs/0VSfjHrlqpPpl6MZ+ySd3 github.com/ipfs/go-ipfs-blockstore v1.2.0/go.mod h1:eh8eTFLiINYNSNawfZOC7HOxNTxpB1PFuA5E1m/7exE= github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IWFJMcGIPQ= github.com/ipfs/go-ipfs-blocksutil v0.0.1/go.mod h1:Yq4M86uIOmxmGPUHv/uI7uKqZNtLb449gwKqXjIsnRk= +github.com/ipfs/go-ipfs-chunker v0.0.1 h1:cHUUxKFQ99pozdahi+uSC/3Y6HeRpi9oTeUHbE27SEw= github.com/ipfs/go-ipfs-chunker v0.0.1/go.mod h1:tWewYK0we3+rMbOh7pPFGDyypCtvGcBFymgY4rSDLAw= github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= @@ -347,7 +352,6 @@ github.com/ipfs/go-log/v2 v2.1.1/go.mod h1:2v2nsGfZsvvAJz13SyFzf9ObaqwHiHxsPLEHn github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g= github.com/ipfs/go-log/v2 v2.3.0 h1:31Re/cPqFHpsRHgyVwjWADPoF0otB1WrjTy8ZFYwEZU= github.com/ipfs/go-log/v2 v2.3.0/go.mod h1:QqGoj30OTpnKaG/LKTGTxoP2mmQtjVMEnK72gynbe/g= -github.com/ipfs/go-merkledag v0.5.1 h1:tr17GPP5XtPhvPPiWtu20tSGZiZDuTaJRXBLcr79Umk= github.com/ipfs/go-merkledag v0.5.1/go.mod h1:cLMZXx8J08idkp5+id62iVftUQV+HlYJ3PIhDfZsjA4= github.com/ipfs/go-merkledag v0.6.0 h1:oV5WT2321tS4YQVOPgIrWHvJ0lJobRTerU+i9nmUCuA= github.com/ipfs/go-merkledag v0.6.0/go.mod h1:9HSEwRd5sV+lbykiYP+2NC/3o6MZbKNaa4hfNcH5iH0= @@ -356,20 +360,18 @@ github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j github.com/ipfs/go-peertaskqueue v0.7.0 h1:VyO6G4sbzX80K58N60cCaHsSsypbUNs1GjO5seGNsQ0= github.com/ipfs/go-peertaskqueue v0.7.0/go.mod h1:M/akTIE/z1jGNXMU7kFB4TeSEFvj68ow0Rrb04donIU= github.com/ipfs/go-unixfs v0.3.1/go.mod h1:h4qfQYzghiIc8ZNFKiLMFWOTzrWIAtzYQ59W/pCFf1o= +github.com/ipfs/go-unixfsnode v1.4.0 h1:9BUxHBXrbNi8mWHc6j+5C580WJqtVw9uoeEKn4tMhwA= github.com/ipfs/go-unixfsnode v1.4.0/go.mod h1:qc7YFFZ8tABc58p62HnIYbUMwj9chhUuFWmxSokfePo= github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2E= github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0= -github.com/ipld/go-car/v2 v2.1.1 h1:saaKz4nC0AdfCGHLYKeXLGn8ivoPC54fyS55uyOLKwA= github.com/ipld/go-car/v2 v2.1.1/go.mod h1:+2Yvf0Z3wzkv7NeI69i8tuZ+ft7jyjPYIWZzeVNeFcI= github.com/ipld/go-car/v2 v2.4.1 h1:9S+FYbQzQJ/XzsdiOV13W5Iu/i+gUnr6csbSD9laFEg= github.com/ipld/go-car/v2 v2.4.1/go.mod h1:zjpRf0Jew9gHqSvjsKVyoq9OY9SWoEKdYCQUKVaaPT0= -github.com/ipld/go-codec-dagpb v1.3.0 h1:czTcaoAuNNyIYWs6Qe01DJ+sEX7B+1Z0LcXjSatMGe8= github.com/ipld/go-codec-dagpb v1.3.0/go.mod h1:ga4JTU3abYApDC3pZ00BC2RSvC3qfBb9MSJkMLSwnhA= github.com/ipld/go-codec-dagpb v1.3.1 h1:yVNlWRQexCa54ln3MSIiUN++ItH7pdhBFhh0hSgZu1w= github.com/ipld/go-codec-dagpb v1.3.1/go.mod h1:ErNNglIi5KMur/MfFE/svtgQthzVvf+43MrzLbpcIZY= github.com/ipld/go-ipld-prime v0.9.1-0.20210324083106-dc342a9917db/go.mod h1:KvBLMr4PX1gWptgkzRjVZCrLmSGcZCb/jioOQwCqZN8= github.com/ipld/go-ipld-prime v0.11.0/go.mod h1:+WIAkokurHmZ/KwzDOMUuoeJgaRQktHtEaLglS3ZeV8= -github.com/ipld/go-ipld-prime v0.14.0 h1:2FnBqUjmmgxgZD6/zB3eygWlmIsHNGrZ57L99x3xD6Q= github.com/ipld/go-ipld-prime v0.14.0/go.mod h1:9ASQLwUFLptCov6lIYc70GRB4V7UTyLD0IJtrDJe6ZM= github.com/ipld/go-ipld-prime v0.16.0 h1:RS5hhjB/mcpeEPJvfyj0qbOj/QL+/j05heZ0qa97dVo= github.com/ipld/go-ipld-prime v0.16.0/go.mod h1:axSCuOCBPqrH+gvXr2w9uAOulJqBPhHPT2PjoiiU1qA= @@ -696,7 +698,6 @@ github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/g github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk= github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= github.com/multiformats/go-multicodec v0.3.0/go.mod h1:qGGaQmioCDh+TeFOnxrbU0DaIPw8yFgAZgFG0V7p1qQ= -github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61 h1:ZrUuMKNgJ52qHPoQ+bx0h0uBfcWmN7Px+4uKSZeesiI= github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61/go.mod h1:1Hj/eHRaVWSXiSNNfcEPcwZleTmdNP81xlxDLnWU9GQ= github.com/multiformats/go-multicodec v0.3.1-0.20211210143421-a526f306ed2c/go.mod h1:1Hj/eHRaVWSXiSNNfcEPcwZleTmdNP81xlxDLnWU9GQ= github.com/multiformats/go-multicodec v0.5.0 h1:EgU6cBe/D7WRwQb1KmnBvU7lrcFGMggZVTPtOW9dDHs= @@ -905,6 +906,7 @@ github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 h1:5HZfQkwe0mIf github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11/go.mod h1:Wlo/SzPmxVp6vXpGt/zaXhHH0fn4IxgqZc82aKg6bpQ= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 h1:WXhVOwj2USAXB5oMDwRl3piOux2XMV9TANaYxXHdkoE= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158/go.mod h1:Xj/M2wWU+QdTdRbu/L/1dIZY8/Wb2K9pAhtroQuxJJI= +github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc= github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc/go.mod h1:bopw91TMyo8J3tvftk8xmU2kPmlrt4nScJQZU2hE5EM= diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index d942405..98b6e9a 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -5,18 +5,19 @@ import ( "errors" "fmt" "sync" + "time" "github.com/filecoin-project/dagstore" - blocks "github.com/ipfs/go-block-format" - logging "github.com/ipfs/go-log/v2" - "github.com/filecoin-project/dagstore/shard" - lru "github.com/hashicorp/golang-lru" + lru "github.com/hnlq715/golang-lru" + blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" + "github.com/ipfs/go-datastore" blockstore "github.com/ipfs/go-ipfs-blockstore" + logging "github.com/ipfs/go-log/v2" ) -var logbs = logging.Logger("dagstore-all-readblockstore") +var logbs = logging.Logger("dagstore/idxbs") var ErrBlockNotFound = errors.New("block not found") @@ -39,8 +40,10 @@ type IndexBackedBlockstore struct { d dagstore.Interface shardSelectF ShardSelectorF - bsStripedLocks [256]sync.Mutex - blockstoreCache *lru.Cache // caches the blockstore for a given shard for shard read affinity i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). + bsStripedLocks [256]sync.Mutex + // caches the blockstore for a given shard for shard read affinity + // i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). + blockstoreCache *lru.Cache } func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { @@ -61,162 +64,220 @@ func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF }, nil } -func (ro *IndexBackedBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block, finalErr error) { - logbs.Debugw("Get called", "cid", c) - defer func() { - if finalErr != nil { - logbs.Debugw("Get: got error", "cid", c, "error", finalErr) - } - }() +type BlockstoreOp bool + +const ( + BlockstoreOpGet = true + BlockstoreOpGetSize = !BlockstoreOpGet +) + +func (o BlockstoreOp) String() string { + if o == BlockstoreOpGet { + return "Get" + } + return "GetSize" +} + +type opRes struct { + block blocks.Block + size int +} + +func (ro *IndexBackedBlockstore) Get(ctx context.Context, c cid.Cid) (blocks.Block, error) { + res, err := ro.execOpWithLogs(ctx, c, BlockstoreOpGet) + if err != nil { + return nil, err + } + return res.block, err +} + +func (ro *IndexBackedBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { + res, err := ro.execOpWithLogs(ctx, c, BlockstoreOpGetSize) + if err != nil { + return 0, err + } + return res.size, err +} + +func (ro *IndexBackedBlockstore) execOpWithLogs(ctx context.Context, c cid.Cid, op BlockstoreOp) (*opRes, error) { + logbs.Debugw(op.String(), "cid", c) - mhash := c.Hash() + res, err := ro.execOp(ctx, c, op) + if err != nil { + logbs.Debugw(op.String()+" error", "cid", c, "error", err) + } else { + logbs.Debugw(op.String()+" success", "cid", c) + } + return res, err +} - // fetch all the shards containing the multihash - shards, err := ro.d.ShardsContainingMultihash(ctx, mhash) +func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op BlockstoreOp) (*opRes, error) { + // Fetch all the shards containing the multihash + shards, err := ro.d.ShardsContainingMultihash(ctx, c.Hash()) if err != nil { - return nil, fmt.Errorf("failed to fetch shards containing the block: %w", err) + if errors.Is(err, datastore.ErrNotFound) { + return nil, ErrBlockNotFound + } + return nil, fmt.Errorf("failed to fetch shards containing block %s (multihash %s): %w", c, c.Hash(), err) } if len(shards) == 0 { + // If there are no shards containing the multihash, return "not found" return nil, ErrBlockNotFound } - // do we have a cached blockstore for a shard containing the required block ? If yes, serve the block from that blockstore + // Do we have a cached blockstore for a shard containing the required block? + // If so, call op on the cached blockstore. for _, sk := range shards { + // Use a striped lock to synchronize between this code that gets from + // the cache and the code below that adds to the cache lk := &ro.bsStripedLocks[shardKeyToStriped(sk)] lk.Lock() - - blk, err := ro.readFromBSCacheUnlocked(ctx, c, sk) - if err == nil && blk != nil { - logbs.Debugw("Get: returning from block store cache", "cid", c) - - lk.Unlock() - return blk, nil - } - + res, err := ro.readFromBSCacheUnlocked(ctx, c, sk, op) lk.Unlock() + if err == nil { + // Found a cached shard blockstore containing the required block, + // and successfully called the blockstore op + return res, nil + } } - // ---- we don't have a cached blockstore for a shard that can serve the block -> let's build one. + // We don't have a cached blockstore for a shard that contains the block. + // Let's build one. - // select a valid shard that can serve the retrieval + // Use the shard select function to select one of the shards with the block sk, err := ro.shardSelectF(c, shards) - if err != nil && err == ErrNoShardSelected { + if err != nil && errors.Is(err, ErrNoShardSelected) { + // If none of the shards passes the selection filter, return "not found" return nil, ErrBlockNotFound } if err != nil { return nil, fmt.Errorf("failed to run shard selection function: %w", err) } + // Synchronize between the code above that gets a blockstore from the cache + // and the code below that adds a blockstore to the cache lk := &ro.bsStripedLocks[shardKeyToStriped(sk)] lk.Lock() defer lk.Unlock() - // see if we have blockstore in the cache we can serve the retrieval from as the previous code in this critical section - // could have added a blockstore to the cache for the given shard key. - blk, err := ro.readFromBSCacheUnlocked(ctx, c, sk) - if err == nil && blk != nil { - return blk, nil + // Check if another thread already added the shard's blockstore to the + // cache while this thread was waiting to obtain the lock + res, err := ro.readFromBSCacheUnlocked(ctx, c, sk, op) + if err == nil { + return res, nil } - // load blockstore for the selected shard and try to serve the cid from that blockstore. + // Load the blockstore for the selected shard resch := make(chan dagstore.ShardResult, 1) if err := ro.d.AcquireShard(ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) } - var res dagstore.ShardResult + var shres dagstore.ShardResult select { case <-ctx.Done(): return nil, ctx.Err() - case res = <-resch: - if res.Error != nil { - return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, res.Error) + case shres = <-resch: + if shres.Error != nil { + return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, shres.Error) } } - sa := res.Accessor + sa := shres.Accessor bs, err := sa.Blockstore() if err != nil { - return nil, fmt.Errorf("failed to load read only blockstore for shard %s: %w", sk, err) + return nil, fmt.Errorf("failed to load read-only blockstore for shard %s: %w", sk, err) } - blk, err = bs.Get(ctx, c) + // Call the operation on the blockstore + res, err = execOpOnBlockstore(ctx, c, sk, bs, op) if err != nil { - return nil, fmt.Errorf("failed to get block: %w", err) + return nil, err } - // update the block cache and the blockstore cache + // Update the blockstore cache ro.blockstoreCache.Add(sk, &accessorWithBlockstore{sa, bs}) - logbs.Debugw("Get: returning after creating new blockstore", "cid", c) - return blk, nil + logbs.Debugw("Added new blockstore to cache", "cid", c, "shard", sk) + return res, nil +} + +func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c cid.Cid, shardContainingCid shard.Key, op BlockstoreOp) (*opRes, error) { + // Get the shard's blockstore from the cache + val, ok := ro.blockstoreCache.Get(shardContainingCid) + if !ok { + return nil, ErrBlockNotFound + } + + accessor := val.(*accessorWithBlockstore) + res, err := execOpOnBlockstore(ctx, c, shardContainingCid, accessor.bs, op) + if err == nil { + return res, nil + } + + // We know that the cid we want to lookup belongs to a shard with key `sk` and + // so if we fail to get the corresponding block from the blockstore for that shard, + // something has gone wrong and we should remove the blockstore for that shard from our cache. + // However there may be several calls from different threads waiting to acquire + // the blockstore from the cache, so to prevent flapping, set a short expiry on the + // cache key instead of removing it immediately. + logbs.Warnf("expected blockstore for shard %s to contain cid %s (multihash %s) but it did not", + shardContainingCid, c, c.Hash()) + ro.blockstoreCache.AddEx(shardContainingCid, accessor, time.Second) + return nil, err +} + +func execOpOnBlockstore(ctx context.Context, c cid.Cid, sk shard.Key, bs dagstore.ReadBlockstore, op BlockstoreOp) (*opRes, error) { + var err error + var res opRes + switch op { + case BlockstoreOpGet: + res.block, err = bs.Get(ctx, c) + case BlockstoreOpGetSize: + res.size, err = bs.GetSize(ctx, c) + } + if err != nil { + return nil, fmt.Errorf("failed to call blockstore.%s for shard %s: %w", op, sk, err) + } + return &res, nil +} + +func shardKeyToStriped(sk shard.Key) byte { + // The shard key is typically a cid, so the last byte should be random. + // Use the last byte as as the striped lock index. + return sk.String()[len(sk.String())-1] } func (ro *IndexBackedBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) { - logbs.Debugw("Has called", "cid", c) + logbs.Debugw("Has", "cid", c) - // if there is a shard that can serve the retrieval for the given cid, we have the requested cid - // and has should return true. + // Get shards that contain the cid's hash shards, err := ro.d.ShardsContainingMultihash(ctx, c.Hash()) if err != nil { logbs.Debugw("Has error", "cid", c, "err", err) return false, nil } if len(shards) == 0 { - logbs.Debugw("Has: returning false no error", "cid", c) + logbs.Debugw("Has: returning false", "cid", c) return false, nil } + // Check if there is a shard with the block that is not filtered out by + // the shard selection function _, err = ro.shardSelectF(c, shards) - if err != nil && err == ErrNoShardSelected { - logbs.Debugw("Has error", "cid", c, "err", err) + if err != nil && errors.Is(err, ErrNoShardSelected) { + logbs.Debugw("Has: returning false", "cid", c) return false, nil } if err != nil { + err = fmt.Errorf("failed to run shard selection function: %w", err) logbs.Debugw("Has error", "cid", c, "err", err) - return false, fmt.Errorf("failed to run shard selection function: %w", err) + return false, err } logbs.Debugw("Has: returning true", "cid", c) return true, nil } -func (ro *IndexBackedBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) { - logbs.Debugw("GetSize called", "cid", c) - - blk, err := ro.Get(ctx, c) - if err != nil { - logbs.Debugw("GetSize error", "cid", c, "err", err) - return 0, fmt.Errorf("failed to get block: %w", err) - } - - logbs.Debugw("GetSize success", "cid", c) - return len(blk.RawData()), nil -} - -func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c cid.Cid, shardContainingCid shard.Key) (blocks.Block, error) { - // We've already ensured that the given shard has the cid/multihash we are looking for. - val, ok := ro.blockstoreCache.Get(shardContainingCid) - if !ok { - return nil, ErrBlockNotFound - } - - rbs := val.(*accessorWithBlockstore).bs - blk, err := rbs.Get(ctx, c) - if err != nil { - // we know that the cid we want to lookup belongs to a shard with key `sk` and - // so if we fail to get the corresponding block from the blockstore for that shards, something has gone wrong - // and we should remove the blockstore for that shard from our cache. - ro.blockstoreCache.Remove(shardContainingCid) - return nil, err - } - - return blk, nil -} - -func shardKeyToStriped(sk shard.Key) byte { - return sk.String()[len(sk.String())-1] -} - // --- UNSUPPORTED BLOCKSTORE METHODS ------- func (ro *IndexBackedBlockstore) DeleteBlock(context.Context, cid.Cid) error { return errors.New("unsupported operation DeleteBlock") diff --git a/indexbs/indexbacked_bs_test.go b/indexbs/indexbacked_bs_test.go index ba06581..c1c1906 100644 --- a/indexbs/indexbacked_bs_test.go +++ b/indexbs/indexbacked_bs_test.go @@ -101,9 +101,11 @@ func TestReadOnlyBs(t *testing.T) { require.NoError(t, errg.Wait()) // ------------------------------------------ - // Now test with a shard selector that rejects everything and ensure we always see errors + // Test with a shard selector that returns an error and verify all methods + // return the error + rejectedErr := errors.New("rejected") fss := func(c cid.Cid, shards []shard.Key) (shard.Key, error) { - return shard.Key{}, errors.New("rejected") + return shard.Key{}, rejectedErr } rbs, err = NewIndexBackedBlockstore(dagst, fss, 10) @@ -112,19 +114,71 @@ func TestReadOnlyBs(t *testing.T) { c := cid.NewCidV1(cid.Raw, mh) has, err := rbs.Has(ctx, c) - require.Error(t, err) + require.ErrorIs(t, err, rejectedErr) require.False(t, has) blk, err := rbs.Get(ctx, c) - require.Error(t, err) + require.ErrorIs(t, err, rejectedErr) require.Empty(t, blk) sz, err := rbs.GetSize(ctx, c) - require.Error(t, err) + require.ErrorIs(t, err, rejectedErr) require.EqualValues(t, 0, sz) return nil }) + + // ------------------------------------------ + // Test with a shard selector that returns ErrNoShardSelected + fss = func(c cid.Cid, shards []shard.Key) (shard.Key, error) { + return shard.Key{}, ErrNoShardSelected + } + + rbs, err = NewIndexBackedBlockstore(dagst, fss, 10) + require.NoError(t, err) + it.ForEach(func(mh multihash.Multihash, u uint64) error { + c := cid.NewCidV1(cid.Raw, mh) + + // Has should return false + has, err := rbs.Has(ctx, c) + require.NoError(t, err) + require.False(t, has) + + // Get should return ErrBlockNotFound + blk, err := rbs.Get(ctx, c) + require.ErrorIs(t, err, ErrBlockNotFound) + require.Empty(t, blk) + + // GetSize should return ErrBlockNotFound + sz, err := rbs.GetSize(ctx, c) + require.ErrorIs(t, err, ErrBlockNotFound) + require.EqualValues(t, 0, sz) + + return nil + }) + + // ------------------------------------------ + // Test with a cid that isn't in the shard + notFoundCid, err := cid.Parse("bafzbeigai3eoy2ccc7ybwjfz5r3rdxqrinwi4rwytly24tdbh6yk7zslrm") + require.NoError(t, err) + + rbs, err = NewIndexBackedBlockstore(dagst, noOpSelector, 10) + require.NoError(t, err) + + // Has should return false + has, err := rbs.Has(ctx, notFoundCid) + require.NoError(t, err) + require.False(t, has) + + // Get should return ErrBlockNotFound + blk, err := rbs.Get(ctx, notFoundCid) + require.ErrorIs(t, err, ErrBlockNotFound) + require.Empty(t, blk) + + // GetSize should return ErrBlockNotFound + sz, err := rbs.GetSize(ctx, notFoundCid) + require.ErrorIs(t, err, ErrBlockNotFound) + require.EqualValues(t, 0, sz) } func testRegistry(t *testing.T) *mount.Registry { From 0b07b8fa6242a27484613f5a0d59fc933270fd42 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 12 Sep 2022 10:57:30 +0200 Subject: [PATCH 15/18] refactor: simplify locking in index-backed blockstore --- go.mod | 2 +- go.sum | 2 - indexbs/indexbacked_bs.go | 92 ++++++++++----------------------------- 3 files changed, 25 insertions(+), 71 deletions(-) diff --git a/go.mod b/go.mod index de9b4f9..27b30fb 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/filecoin-project/dagstore go 1.16 require ( - github.com/hnlq715/golang-lru v0.3.0 + github.com/hashicorp/golang-lru v0.5.4 github.com/ipfs/go-block-format v0.0.3 github.com/ipfs/go-blockservice v0.4.0 // indirect github.com/ipfs/go-cid v0.1.0 diff --git a/go.sum b/go.sum index faf7710..ed84d5e 100644 --- a/go.sum +++ b/go.sum @@ -249,8 +249,6 @@ github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= -github.com/hnlq715/golang-lru v0.3.0 h1:eJtRD3bIw/dxwha16+urdY7bGfoCy/fAM+A/gahvYJM= -github.com/hnlq715/golang-lru v0.3.0/go.mod h1:RBkgDAtlu0SgTPvpb4VW2/RQnkCBMRD3Lr6B9RhsAS8= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= github.com/huin/goupnp v1.0.0 h1:wg75sLpL6DZqwHQN6E1Cfk6mtfzS45z8OV+ic+DtHRo= diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index 98b6e9a..90f114e 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -4,12 +4,10 @@ import ( "context" "errors" "fmt" - "sync" - "time" "github.com/filecoin-project/dagstore" "github.com/filecoin-project/dagstore/shard" - lru "github.com/hnlq715/golang-lru" + lru "github.com/hashicorp/golang-lru" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" "github.com/ipfs/go-datastore" @@ -40,7 +38,6 @@ type IndexBackedBlockstore struct { d dagstore.Interface shardSelectF ShardSelectorF - bsStripedLocks [256]sync.Mutex // caches the blockstore for a given shard for shard read affinity // i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). blockstoreCache *lru.Cache @@ -128,21 +125,26 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block // Do we have a cached blockstore for a shard containing the required block? // If so, call op on the cached blockstore. for _, sk := range shards { - // Use a striped lock to synchronize between this code that gets from - // the cache and the code below that adds to the cache - lk := &ro.bsStripedLocks[shardKeyToStriped(sk)] - lk.Lock() - res, err := ro.readFromBSCacheUnlocked(ctx, c, sk, op) - lk.Unlock() - if err == nil { - // Found a cached shard blockstore containing the required block, - // and successfully called the blockstore op - return res, nil + // Get the shard's blockstore from the cache + val, ok := ro.blockstoreCache.Get(sk) + if ok { + accessor := val.(*accessorWithBlockstore) + res, err := execOpOnBlockstore(ctx, c, sk, accessor.bs, op) + if err == nil { + // Found a cached shard blockstore containing the required block, + // and successfully called the blockstore op + return res, nil + } } } - // We don't have a cached blockstore for a shard that contains the block. - // Let's build one. + // We weren't able to get the block which means that either + // 1. There is no cached blockstore for a shard that contains the block + // 2. There was an error trying to get the block from the existing cached + // blockstore. + // ShardsContainingMultihash indicated that the shard has the block, so + // if there was an error getting it, it means there is something wrong. + // So in either case we should create a new blockstore for the shard. // Use the shard select function to select one of the shards with the block sk, err := ro.shardSelectF(c, shards) @@ -154,20 +156,9 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block return nil, fmt.Errorf("failed to run shard selection function: %w", err) } - // Synchronize between the code above that gets a blockstore from the cache - // and the code below that adds a blockstore to the cache - lk := &ro.bsStripedLocks[shardKeyToStriped(sk)] - lk.Lock() - defer lk.Unlock() - - // Check if another thread already added the shard's blockstore to the - // cache while this thread was waiting to obtain the lock - res, err := ro.readFromBSCacheUnlocked(ctx, c, sk, op) - if err == nil { - return res, nil - } - - // Load the blockstore for the selected shard + // Load the blockstore for the selected shard. + // Note that internally the DAG store will synchronize multiple concurrent + // acquires for the same shard. resch := make(chan dagstore.ShardResult, 1) if err := ro.d.AcquireShard(ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) @@ -188,42 +179,13 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block return nil, fmt.Errorf("failed to load read-only blockstore for shard %s: %w", sk, err) } - // Call the operation on the blockstore - res, err = execOpOnBlockstore(ctx, c, sk, bs, op) - if err != nil { - return nil, err - } - - // Update the blockstore cache + // Add the blockstore to the cache ro.blockstoreCache.Add(sk, &accessorWithBlockstore{sa, bs}) logbs.Debugw("Added new blockstore to cache", "cid", c, "shard", sk) - return res, nil -} -func (ro *IndexBackedBlockstore) readFromBSCacheUnlocked(ctx context.Context, c cid.Cid, shardContainingCid shard.Key, op BlockstoreOp) (*opRes, error) { - // Get the shard's blockstore from the cache - val, ok := ro.blockstoreCache.Get(shardContainingCid) - if !ok { - return nil, ErrBlockNotFound - } - - accessor := val.(*accessorWithBlockstore) - res, err := execOpOnBlockstore(ctx, c, shardContainingCid, accessor.bs, op) - if err == nil { - return res, nil - } - - // We know that the cid we want to lookup belongs to a shard with key `sk` and - // so if we fail to get the corresponding block from the blockstore for that shard, - // something has gone wrong and we should remove the blockstore for that shard from our cache. - // However there may be several calls from different threads waiting to acquire - // the blockstore from the cache, so to prevent flapping, set a short expiry on the - // cache key instead of removing it immediately. - logbs.Warnf("expected blockstore for shard %s to contain cid %s (multihash %s) but it did not", - shardContainingCid, c, c.Hash()) - ro.blockstoreCache.AddEx(shardContainingCid, accessor, time.Second) - return nil, err + // Call the operation on the blockstore + return execOpOnBlockstore(ctx, c, sk, bs, op) } func execOpOnBlockstore(ctx context.Context, c cid.Cid, sk shard.Key, bs dagstore.ReadBlockstore, op BlockstoreOp) (*opRes, error) { @@ -241,12 +203,6 @@ func execOpOnBlockstore(ctx context.Context, c cid.Cid, sk shard.Key, bs dagstor return &res, nil } -func shardKeyToStriped(sk shard.Key) byte { - // The shard key is typically a cid, so the last byte should be random. - // Use the last byte as as the striped lock index. - return sk.String()[len(sk.String())-1] -} - func (ro *IndexBackedBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) { logbs.Debugw("Has", "cid", c) From 74fd751687cd8cc628bfc6cfcbc2dc5618aca43d Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 13 Sep 2022 15:08:02 +0200 Subject: [PATCH 16/18] refactor: ref-count blockstore acquires so as to close exactly once --- indexbs/indexbacked_bs.go | 132 ++++++++++++++++++++++++++++++++------ 1 file changed, 112 insertions(+), 20 deletions(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index 90f114e..d3d4dbb 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "sync" "github.com/filecoin-project/dagstore" "github.com/filecoin-project/dagstore/shard" @@ -28,11 +29,6 @@ var ErrNoShardSelected = errors.New("no shard selected") // It should return `ErrNoShardSelected` if none of the given shard is selected. type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) -type accessorWithBlockstore struct { - sa *dagstore.ShardAccessor - bs dagstore.ReadBlockstore -} - // IndexBackedBlockstore is a read only blockstore over all cids across all shards in the dagstore. type IndexBackedBlockstore struct { d dagstore.Interface @@ -40,24 +36,19 @@ type IndexBackedBlockstore struct { // caches the blockstore for a given shard for shard read affinity // i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). - blockstoreCache *lru.Cache + bsCache *blockstoreCache } func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { - // instantiate the blockstore cache - bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { - // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. - abs := val.(*accessorWithBlockstore) - abs.sa.Close() - }) + cache, err := newBlockstoreCache(maxCacheSize) if err != nil { - return nil, fmt.Errorf("failed to create lru cache for read only blockstores") + return nil, err } return &IndexBackedBlockstore{ - d: d, - shardSelectF: shardSelector, - blockstoreCache: bslru, + d: d, + shardSelectF: shardSelector, + bsCache: cache, }, nil } @@ -126,10 +117,10 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block // If so, call op on the cached blockstore. for _, sk := range shards { // Get the shard's blockstore from the cache - val, ok := ro.blockstoreCache.Get(sk) + abs, ok := ro.bsCache.Get(sk) if ok { - accessor := val.(*accessorWithBlockstore) - res, err := execOpOnBlockstore(ctx, c, sk, accessor.bs, op) + res, err := execOpOnBlockstore(ctx, c, sk, abs.bs, op) + abs.close() if err == nil { // Found a cached shard blockstore containing the required block, // and successfully called the blockstore op @@ -180,7 +171,9 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block } // Add the blockstore to the cache - ro.blockstoreCache.Add(sk, &accessorWithBlockstore{sa, bs}) + abs := &accessorWithBlockstore{sa: sa, bs: bs} + ro.bsCache.Add(sk, abs) + defer abs.close() logbs.Debugw("Added new blockstore to cache", "cid", c, "shard", sk) @@ -248,3 +241,102 @@ func (ro *IndexBackedBlockstore) PutMany(context.Context, []blocks.Block) error func (ro *IndexBackedBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return nil, errors.New("unsupported operation AllKeysChan") } + +type blockstoreCache struct { + lk sync.Mutex + cache *lru.Cache +} + +func newBlockstoreCache(size int) (*blockstoreCache, error) { + bslru, err := lru.NewWithEvict(size, func(_ interface{}, val interface{}) { + abs := val.(*accessorWithBlockstore) + abs.evict() + }) + if err != nil { + return nil, fmt.Errorf("failed to create lru cache for read only blockstores: %w", err) + } + + return &blockstoreCache{cache: bslru}, nil +} + +func (bc *blockstoreCache) Get(sk shard.Key) (*accessorWithBlockstore, bool) { + bc.lk.Lock() + defer bc.lk.Unlock() + + // Get the accessor from the cache + absi, ok := bc.cache.Get(sk) + if !ok { + return nil, false + } + + // Increment the accessor's ref count so that the blockstore + // will not be closed until the caller is finished with it + abs := absi.(*accessorWithBlockstore) + abs.incRefCount() + return abs, true +} + +func (bc *blockstoreCache) Add(sk shard.Key, abs *accessorWithBlockstore) { + bc.lk.Lock() + defer bc.lk.Unlock() + + // Check if we're replacing an existing accessor with this Add + absi, ok := bc.cache.Get(sk) + if ok { + // Mark the existing accessor as evicted so that its blockstore can be + // closed once all callers are done with the blockstore + abs := absi.(*accessorWithBlockstore) + abs.evict() + } + + // Add the new accessor + bc.cache.Add(sk, abs) + abs.incRefCount() +} + +type accessorWithBlockstore struct { + sa *dagstore.ShardAccessor + bs dagstore.ReadBlockstore + + lk sync.Mutex + evicted bool + refCount int +} + +func (abs *accessorWithBlockstore) incRefCount() { + abs.lk.Lock() + defer abs.lk.Unlock() + + abs.refCount++ +} + +func (abs *accessorWithBlockstore) close() { + abs.lk.Lock() + defer abs.lk.Unlock() + + abs.refCount-- + if abs.refCount == 0 && abs.evicted { + // The blockstore has already been evicted, and this was the last + // reference to it, so close the blockstore so that dagstore can GC it + err := abs.sa.Close() + if err != nil { + logbs.Warnf("error closing blockstore: %w", err) + } + } +} + +func (abs *accessorWithBlockstore) evict() { + abs.lk.Lock() + defer abs.lk.Unlock() + + abs.evicted = true + + if abs.refCount == 0 { + // There are no more references to the blockstore; close it so that the + // dagstore can GC it + err := abs.sa.Close() + if err != nil { + logbs.Warnf("error closing blockstore: %w", err) + } + } +} From 194d8cf2ee043c3450db4f1178e1d8129dd058b7 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 16 Sep 2022 15:13:28 +0200 Subject: [PATCH 17/18] feat: index-backed blockstore - synchronize acquires --- indexbs/indexbacked_bs.go | 231 ++++++++++++++------------------------ 1 file changed, 85 insertions(+), 146 deletions(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index d3d4dbb..0693745 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -29,6 +29,17 @@ var ErrNoShardSelected = errors.New("no shard selected") // It should return `ErrNoShardSelected` if none of the given shard is selected. type ShardSelectorF func(c cid.Cid, shards []shard.Key) (shard.Key, error) +type accessorWithBlockstore struct { + sa *dagstore.ShardAccessor + bs dagstore.ReadBlockstore +} + +type blockstoreAcquire struct { + once sync.Once + bs dagstore.ReadBlockstore + err error +} + // IndexBackedBlockstore is a read only blockstore over all cids across all shards in the dagstore. type IndexBackedBlockstore struct { d dagstore.Interface @@ -36,19 +47,26 @@ type IndexBackedBlockstore struct { // caches the blockstore for a given shard for shard read affinity // i.e. further reads will likely be from the same shard. Maps (shard key -> blockstore). - bsCache *blockstoreCache + blockstoreCache *lru.Cache + // used to manage concurrent acquisition of shards by multiple threads + bsAcquireByShard sync.Map } func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { - cache, err := newBlockstoreCache(maxCacheSize) + // instantiate the blockstore cache + bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { + // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. + abs := val.(*accessorWithBlockstore) + abs.sa.Close() + }) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to create lru cache for read only blockstores") } return &IndexBackedBlockstore{ - d: d, - shardSelectF: shardSelector, - bsCache: cache, + d: d, + shardSelectF: shardSelector, + blockstoreCache: bslru, }, nil } @@ -117,25 +135,22 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block // If so, call op on the cached blockstore. for _, sk := range shards { // Get the shard's blockstore from the cache - abs, ok := ro.bsCache.Get(sk) + val, ok := ro.blockstoreCache.Get(sk) if ok { - res, err := execOpOnBlockstore(ctx, c, sk, abs.bs, op) - abs.close() - if err == nil { - // Found a cached shard blockstore containing the required block, - // and successfully called the blockstore op - return res, nil + accessor := val.(*accessorWithBlockstore) + res, err := execOpOnBlockstore(ctx, c, sk, accessor.bs, op) + if err != nil { + return nil, err } + + // Found a cached blockstore containing the required block, + // and successfully called the blockstore op + return res, nil } } - // We weren't able to get the block which means that either - // 1. There is no cached blockstore for a shard that contains the block - // 2. There was an error trying to get the block from the existing cached - // blockstore. - // ShardsContainingMultihash indicated that the shard has the block, so - // if there was an error getting it, it means there is something wrong. - // So in either case we should create a new blockstore for the shard. + // We weren't able to find a cached blockstore for a shard that contains + // the block. Create a new blockstore for the shard. // Use the shard select function to select one of the shards with the block sk, err := ro.shardSelectF(c, shards) @@ -147,38 +162,61 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block return nil, fmt.Errorf("failed to run shard selection function: %w", err) } - // Load the blockstore for the selected shard. - // Note that internally the DAG store will synchronize multiple concurrent - // acquires for the same shard. - resch := make(chan dagstore.ShardResult, 1) - if err := ro.d.AcquireShard(ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { - return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) - } - var shres dagstore.ShardResult - select { - case <-ctx.Done(): - return nil, ctx.Err() - case shres = <-resch: - if shres.Error != nil { - return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, shres.Error) - } - } + // Some retrieval patterns will result in multiple threads fetching blocks + // from the same piece concurrently. In that case many threads may attempt + // to create a blockstore over the same piece. Use a sync.Once to ensure + // that the blockstore is only created once for all threads waiting on the + // same shard. + bsAcquireI, _ := ro.bsAcquireByShard.LoadOrStore(sk, &blockstoreAcquire{}) + bsAcquire := bsAcquireI.(*blockstoreAcquire) + bsAcquire.once.Do(func() { + bsAcquire.bs, bsAcquire.err = func() (dagstore.ReadBlockstore, error) { + // Check if the blockstore was created by another thread while this + // thread was waiting to enter the sync.Once + val, ok := ro.blockstoreCache.Get(sk) + if ok { + return val.(dagstore.ReadBlockstore), nil + } - sa := shres.Accessor - bs, err := sa.Blockstore() - if err != nil { - return nil, fmt.Errorf("failed to load read-only blockstore for shard %s: %w", sk, err) - } + // Acquire the blockstore for the selected shard + resch := make(chan dagstore.ShardResult, 1) + if err := ro.d.AcquireShard(ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { + return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) + } + var shres dagstore.ShardResult + select { + case <-ctx.Done(): + return nil, ctx.Err() + case shres = <-resch: + if shres.Error != nil { + return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, shres.Error) + } + } + + sa := shres.Accessor + bs, err := sa.Blockstore() + if err != nil { + return nil, fmt.Errorf("failed to load read-only blockstore for shard %s: %w", sk, err) + } + + // Add the blockstore to the cache + ro.blockstoreCache.Add(sk, &accessorWithBlockstore{sa, bs}) + + logbs.Debugw("Added new blockstore to cache", "cid", c, "shard", sk) + + return bs, nil + }() - // Add the blockstore to the cache - abs := &accessorWithBlockstore{sa: sa, bs: bs} - ro.bsCache.Add(sk, abs) - defer abs.close() + // The sync.Once has completed so clean up the acquire entry for this shard + ro.bsAcquireByShard.Delete(sk) + }) - logbs.Debugw("Added new blockstore to cache", "cid", c, "shard", sk) + if bsAcquire.err != nil { + return nil, bsAcquire.err + } // Call the operation on the blockstore - return execOpOnBlockstore(ctx, c, sk, bs, op) + return execOpOnBlockstore(ctx, c, sk, bsAcquire.bs, op) } func execOpOnBlockstore(ctx context.Context, c cid.Cid, sk shard.Key, bs dagstore.ReadBlockstore, op BlockstoreOp) (*opRes, error) { @@ -241,102 +279,3 @@ func (ro *IndexBackedBlockstore) PutMany(context.Context, []blocks.Block) error func (ro *IndexBackedBlockstore) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return nil, errors.New("unsupported operation AllKeysChan") } - -type blockstoreCache struct { - lk sync.Mutex - cache *lru.Cache -} - -func newBlockstoreCache(size int) (*blockstoreCache, error) { - bslru, err := lru.NewWithEvict(size, func(_ interface{}, val interface{}) { - abs := val.(*accessorWithBlockstore) - abs.evict() - }) - if err != nil { - return nil, fmt.Errorf("failed to create lru cache for read only blockstores: %w", err) - } - - return &blockstoreCache{cache: bslru}, nil -} - -func (bc *blockstoreCache) Get(sk shard.Key) (*accessorWithBlockstore, bool) { - bc.lk.Lock() - defer bc.lk.Unlock() - - // Get the accessor from the cache - absi, ok := bc.cache.Get(sk) - if !ok { - return nil, false - } - - // Increment the accessor's ref count so that the blockstore - // will not be closed until the caller is finished with it - abs := absi.(*accessorWithBlockstore) - abs.incRefCount() - return abs, true -} - -func (bc *blockstoreCache) Add(sk shard.Key, abs *accessorWithBlockstore) { - bc.lk.Lock() - defer bc.lk.Unlock() - - // Check if we're replacing an existing accessor with this Add - absi, ok := bc.cache.Get(sk) - if ok { - // Mark the existing accessor as evicted so that its blockstore can be - // closed once all callers are done with the blockstore - abs := absi.(*accessorWithBlockstore) - abs.evict() - } - - // Add the new accessor - bc.cache.Add(sk, abs) - abs.incRefCount() -} - -type accessorWithBlockstore struct { - sa *dagstore.ShardAccessor - bs dagstore.ReadBlockstore - - lk sync.Mutex - evicted bool - refCount int -} - -func (abs *accessorWithBlockstore) incRefCount() { - abs.lk.Lock() - defer abs.lk.Unlock() - - abs.refCount++ -} - -func (abs *accessorWithBlockstore) close() { - abs.lk.Lock() - defer abs.lk.Unlock() - - abs.refCount-- - if abs.refCount == 0 && abs.evicted { - // The blockstore has already been evicted, and this was the last - // reference to it, so close the blockstore so that dagstore can GC it - err := abs.sa.Close() - if err != nil { - logbs.Warnf("error closing blockstore: %w", err) - } - } -} - -func (abs *accessorWithBlockstore) evict() { - abs.lk.Lock() - defer abs.lk.Unlock() - - abs.evicted = true - - if abs.refCount == 0 { - // There are no more references to the blockstore; close it so that the - // dagstore can GC it - err := abs.sa.Close() - if err != nil { - logbs.Warnf("error closing blockstore: %w", err) - } - } -} From 5c8caacf7b0a019385e057e3ea4f2f012418cd8d Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 19 Sep 2022 15:05:23 +0200 Subject: [PATCH 18/18] refactor: use global context for Acquireshard --- indexbs/indexbacked_bs.go | 11 ++++++++--- indexbs/indexbacked_bs_test.go | 8 ++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/indexbs/indexbacked_bs.go b/indexbs/indexbacked_bs.go index 0693745..74166c7 100644 --- a/indexbs/indexbacked_bs.go +++ b/indexbs/indexbacked_bs.go @@ -42,6 +42,7 @@ type blockstoreAcquire struct { // IndexBackedBlockstore is a read only blockstore over all cids across all shards in the dagstore. type IndexBackedBlockstore struct { + ctx context.Context d dagstore.Interface shardSelectF ShardSelectorF @@ -52,10 +53,13 @@ type IndexBackedBlockstore struct { bsAcquireByShard sync.Map } -func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { +func NewIndexBackedBlockstore(ctx context.Context, d dagstore.Interface, shardSelector ShardSelectorF, maxCacheSize int) (blockstore.Blockstore, error) { // instantiate the blockstore cache bslru, err := lru.NewWithEvict(maxCacheSize, func(_ interface{}, val interface{}) { - // ensure we close the blockstore for a shard when it's evicted from the cache so dagstore can gc it. + // Ensure we close the blockstore for a shard when it's evicted from + // the cache so dagstore can gc it. + // TODO: add reference counting mechanism so that the blockstore does + // not get closed while there is an operation still in progress against it abs := val.(*accessorWithBlockstore) abs.sa.Close() }) @@ -64,6 +68,7 @@ func NewIndexBackedBlockstore(d dagstore.Interface, shardSelector ShardSelectorF } return &IndexBackedBlockstore{ + ctx: ctx, d: d, shardSelectF: shardSelector, blockstoreCache: bslru, @@ -180,7 +185,7 @@ func (ro *IndexBackedBlockstore) execOp(ctx context.Context, c cid.Cid, op Block // Acquire the blockstore for the selected shard resch := make(chan dagstore.ShardResult, 1) - if err := ro.d.AcquireShard(ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { + if err := ro.d.AcquireShard(ro.ctx, sk, resch, dagstore.AcquireOpts{}); err != nil { return nil, fmt.Errorf("failed to acquire shard %s: %w", sk, err) } var shres dagstore.ShardResult diff --git a/indexbs/indexbacked_bs_test.go b/indexbs/indexbacked_bs_test.go index c1c1906..08972aa 100644 --- a/indexbs/indexbacked_bs_test.go +++ b/indexbs/indexbacked_bs_test.go @@ -47,7 +47,7 @@ func TestReadOnlyBs(t *testing.T) { res := <-ch require.NoError(t, res.Error) - rbs, err := NewIndexBackedBlockstore(dagst, noOpSelector, 10) + rbs, err := NewIndexBackedBlockstore(ctx, dagst, noOpSelector, 10) require.NoError(t, err) // iterate over the CARV2 Index for the given CARv2 file and ensure the readonly blockstore @@ -108,7 +108,7 @@ func TestReadOnlyBs(t *testing.T) { return shard.Key{}, rejectedErr } - rbs, err = NewIndexBackedBlockstore(dagst, fss, 10) + rbs, err = NewIndexBackedBlockstore(ctx, dagst, fss, 10) require.NoError(t, err) it.ForEach(func(mh multihash.Multihash, u uint64) error { c := cid.NewCidV1(cid.Raw, mh) @@ -134,7 +134,7 @@ func TestReadOnlyBs(t *testing.T) { return shard.Key{}, ErrNoShardSelected } - rbs, err = NewIndexBackedBlockstore(dagst, fss, 10) + rbs, err = NewIndexBackedBlockstore(ctx, dagst, fss, 10) require.NoError(t, err) it.ForEach(func(mh multihash.Multihash, u uint64) error { c := cid.NewCidV1(cid.Raw, mh) @@ -162,7 +162,7 @@ func TestReadOnlyBs(t *testing.T) { notFoundCid, err := cid.Parse("bafzbeigai3eoy2ccc7ybwjfz5r3rdxqrinwi4rwytly24tdbh6yk7zslrm") require.NoError(t, err) - rbs, err = NewIndexBackedBlockstore(dagst, noOpSelector, 10) + rbs, err = NewIndexBackedBlockstore(ctx, dagst, noOpSelector, 10) require.NoError(t, err) // Has should return false