Skip to content

Commit

Permalink
Rework Block and Index cache (#1473) (#1476)
Browse files Browse the repository at this point in the history
This PR separates the block cache and the index cache. Earlier we had a mix of both the caches.
Block Cache -> Stores blocks. Should be used when running badger with compression/encryption
Index Cache -> Used to limit the memory used by table indices and bloom filters.

This PR also remove the `KeepBlocksInCache` and `KeepBlockIndicesInCache` options.

(cherry picked from commit c3b1582)
  • Loading branch information
Ibrahim Jarif authored Aug 26, 2020
1 parent 6d05358 commit 7d288ec
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 198 deletions.
38 changes: 19 additions & 19 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ type DB struct {
pub *publisher
registry *KeyRegistry
blockCache *ristretto.Cache
bfCache *ristretto.Cache
indexCache *ristretto.Cache
}

const (
Expand Down Expand Up @@ -311,11 +311,11 @@ func Open(opt Options) (db *DB, err error) {
}
}()

if opt.MaxCacheSize > 0 {
if opt.BlockCacheSize > 0 {
config := ristretto.Config{
// Use 5% of cache memory for storing counters.
NumCounters: int64(float64(opt.MaxCacheSize) * 0.05 * 2),
MaxCost: int64(float64(opt.MaxCacheSize) * 0.95),
NumCounters: int64(float64(opt.BlockCacheSize) * 0.05 * 2),
MaxCost: int64(float64(opt.BlockCacheSize) * 0.95),
BufferItems: 64,
Metrics: true,
}
Expand All @@ -325,15 +325,15 @@ func Open(opt Options) (db *DB, err error) {
}
}

if opt.MaxBfCacheSize > 0 {
if opt.IndexCacheSize > 0 {
config := ristretto.Config{
// Use 5% of cache memory for storing counters.
NumCounters: int64(float64(opt.MaxBfCacheSize) * 0.05 * 2),
MaxCost: int64(float64(opt.MaxBfCacheSize) * 0.95),
NumCounters: int64(float64(opt.IndexCacheSize) * 0.05 * 2),
MaxCost: int64(float64(opt.IndexCacheSize) * 0.95),
BufferItems: 64,
Metrics: true,
}
db.bfCache, err = ristretto.NewCache(&config)
db.indexCache, err = ristretto.NewCache(&config)
if err != nil {
return nil, errors.Wrap(err, "failed to create bf cache")
}
Expand Down Expand Up @@ -427,7 +427,7 @@ func Open(opt Options) (db *DB, err error) {
// cleanup goroutines in case of an error.
func (db *DB) cleanup() {
db.blockCache.Close()
db.bfCache.Close()
db.indexCache.Close()
db.stopMemoryFlush()
db.stopCompactions()

Expand All @@ -448,18 +448,18 @@ func (db *DB) cleanup() {
db.vlog.Close()
}

// DataCacheMetrics returns the metrics for the underlying data cache.
func (db *DB) DataCacheMetrics() *ristretto.Metrics {
// BlockCacheMetrics returns the metrics for the underlying block cache.
func (db *DB) BlockCacheMetrics() *ristretto.Metrics {
if db.blockCache != nil {
return db.blockCache.Metrics
}
return nil
}

// BfCacheMetrics returns the metrics for the underlying bloom filter cache.
func (db *DB) BfCacheMetrics() *ristretto.Metrics {
if db.bfCache != nil {
return db.bfCache.Metrics
// IndexCacheMetrics returns the metrics for the underlying index cache.
func (db *DB) IndexCacheMetrics() *ristretto.Metrics {
if db.indexCache != nil {
return db.indexCache.Metrics
}
return nil
}
Expand Down Expand Up @@ -553,7 +553,7 @@ func (db *DB) close() (err error) {
db.closers.updateSize.SignalAndWait()
db.orc.Stop()
db.blockCache.Close()
db.bfCache.Close()
db.indexCache.Close()

if db.opt.InMemory {
return
Expand Down Expand Up @@ -1042,8 +1042,8 @@ func (db *DB) handleFlushTask(ft flushTask) error {
bopts := buildTableOptions(db.opt)
bopts.DataKey = dk
// Builder does not need cache but the same options are used for opening table.
bopts.Cache = db.blockCache
bopts.BfCache = db.bfCache
bopts.BlockCache = db.blockCache
bopts.IndexCache = db.indexCache
tableData := buildL0Table(ft, bopts)

fileID := db.lc.reserveFileID()
Expand Down Expand Up @@ -1626,7 +1626,7 @@ func (db *DB) dropAll() (func(), error) {
db.lc.nextFileID = 1
db.opt.Infof("Deleted %d value log files. DropAll done.\n", num)
db.blockCache.Clear()
db.bfCache.Clear()
db.indexCache.Clear()

return resume, nil
}
Expand Down
13 changes: 6 additions & 7 deletions db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func getTestOptions(dir string) Options {
WithMaxTableSize(1 << 15). // Force more compaction.
WithLevelOneSize(4 << 15). // Force more compaction.
WithSyncWrites(false).
WithMaxCacheSize(10 << 20)
WithBlockCacheSize(10 << 20)
if !*mmap {
return opt.WithValueLogLoadingMode(options.FileIO)
}
Expand Down Expand Up @@ -287,12 +287,11 @@ func TestGet(t *testing.T) {
test(t, db)
require.NoError(t, db.Close())
})
t.Run("cache disabled", func(t *testing.T) {
opts := DefaultOptions("").WithInMemory(true).WithMaxCacheSize(0)
db, err := Open(opts)
require.NoError(t, err)
test(t, db)
require.NoError(t, db.Close())
t.Run("cache enabled", func(t *testing.T) {
opts := DefaultOptions("").WithBlockCacheSize(10 << 20)
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
test(t, db)
})
})
}

Expand Down
7 changes: 4 additions & 3 deletions iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func TestIteratePrefix(t *testing.T) {
t.Run("With Block Offsets in Cache", func(t *testing.T) {
t.Parallel()
opts := getTestOptions("")
opts = opts.WithKeepBlockIndicesInCache(true)
opts.IndexCacheSize = 100 << 20
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
testIteratorPrefix(t, db)
})
Expand All @@ -219,7 +219,8 @@ func TestIteratePrefix(t *testing.T) {
t.Run("With Block Offsets and Blocks in Cache", func(t *testing.T) {
t.Parallel()
opts := getTestOptions("")
opts = opts.WithKeepBlockIndicesInCache(true).WithKeepBlocksInCache(true)
opts.BlockCacheSize = 100 << 20
opts.IndexCacheSize = 100 << 20
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
testIteratorPrefix(t, db)
})
Expand All @@ -228,7 +229,7 @@ func TestIteratePrefix(t *testing.T) {
t.Run("With Blocks in Cache", func(t *testing.T) {
t.Parallel()
opts := getTestOptions("")
opts = opts.WithKeepBlocksInCache(true)
opts.BlockCacheSize = 100 << 20
runBadgerTest(t, &opts, func(t *testing.T, db *DB) {
testIteratorPrefix(t, db)
})
Expand Down
8 changes: 4 additions & 4 deletions levels.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ func newLevelsController(db *DB, mf *Manifest) (*levelsController, error) {
// Set compression from table manifest.
topt.Compression = tf.Compression
topt.DataKey = dk
topt.Cache = db.blockCache
topt.BfCache = db.bfCache
topt.BlockCache = db.blockCache
topt.IndexCache = db.indexCache
t, err := table.OpenTable(fd, topt)
if err != nil {
if strings.HasPrefix(err.Error(), "CHECKSUM_MISMATCH:") {
Expand Down Expand Up @@ -569,8 +569,8 @@ nextTable:
bopts := buildTableOptions(s.kv.opt)
bopts.DataKey = dk
// Builder does not need cache but the same options are used for opening table.
bopts.Cache = s.kv.blockCache
bopts.BfCache = s.kv.bfCache
bopts.BlockCache = s.kv.blockCache
bopts.IndexCache = s.kv.indexCache
builder := table.NewTableBuilder(bopts)
var numKeys, numSkips uint64
for ; it.Valid(); it.Next() {
Expand Down
122 changes: 40 additions & 82 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ type Options struct {
BlockSize int
BloomFalsePositive float64
KeepL0InMemory bool
MaxCacheSize int64
MaxBfCacheSize int64
BlockCacheSize int64
IndexCacheSize int64
LoadBloomsOnOpen bool

NumLevelZeroTables int
Expand Down Expand Up @@ -98,12 +98,6 @@ type Options struct {
// conflict detection is disabled.
DetectConflicts bool

// KeepBlockIndicesInCache decides whether to keep the block offsets in the cache or not.
KeepBlockIndicesInCache bool

// KeepBlocksInCache decides whether to keep the sst blocks in the cache or not.
KeepBlocksInCache bool

// Transaction start and commit timestamps are managed by end-user.
// This is only useful for databases built on top of Badger (like Dgraph).
// Not recommended for most users.
Expand Down Expand Up @@ -141,9 +135,10 @@ func DefaultOptions(path string) Options {
KeepL0InMemory: false,
VerifyValueChecksum: false,
Compression: options.None,
MaxCacheSize: 0,
MaxBfCacheSize: 0,
BlockCacheSize: 0,
IndexCacheSize: 0,
LoadBloomsOnOpen: true,

// The following benchmarks were done on a 4 KB block size (default block size). The
// compression is ratio supposed to increase with increasing compression level but since the
// input for compression algorithm is small (4 KB), we don't get significant benefit at
Expand All @@ -169,22 +164,18 @@ func DefaultOptions(path string) Options {
EncryptionKey: []byte{},
EncryptionKeyRotationDuration: 10 * 24 * time.Hour, // Default 10 days.
DetectConflicts: true,
KeepBlocksInCache: false,
KeepBlockIndicesInCache: false,
}
}

func buildTableOptions(opt Options) table.Options {
return table.Options{
BlockSize: opt.BlockSize,
BloomFalsePositive: opt.BloomFalsePositive,
LoadBloomsOnOpen: opt.LoadBloomsOnOpen,
LoadingMode: opt.TableLoadingMode,
ChkMode: opt.ChecksumVerificationMode,
Compression: opt.Compression,
ZSTDCompressionLevel: opt.ZSTDCompressionLevel,
KeepBlockIndicesInCache: opt.KeepBlockIndicesInCache,
KeepBlocksInCache: opt.KeepBlocksInCache,
BlockSize: opt.BlockSize,
BloomFalsePositive: opt.BloomFalsePositive,
LoadBloomsOnOpen: opt.LoadBloomsOnOpen,
LoadingMode: opt.TableLoadingMode,
ChkMode: opt.ChecksumVerificationMode,
Compression: opt.Compression,
ZSTDCompressionLevel: opt.ZSTDCompressionLevel,
}
}

Expand Down Expand Up @@ -500,7 +491,7 @@ func (opt Options) WithEncryptionKey(key []byte) Options {
return opt
}

// WithEncryptionRotationDuration returns new Options value with the duration set to
// WithEncryptionKeyRotationDuration returns new Options value with the duration set to
// the given value.
//
// Key Registry will use this duration to create new keys. If the previous generated
Expand Down Expand Up @@ -559,18 +550,18 @@ func (opt Options) WithChecksumVerificationMode(cvMode options.ChecksumVerificat
return opt
}

// WithMaxCacheSize returns a new Options value with MaxCacheSize set to the given value.
// WithBlockCacheSize returns a new Options value with BlockCacheSize set to the given value.
//
// This value specifies how much data cache should hold in memory. A small size of cache means lower
// memory consumption and lookups/iterations would take longer.
// It is recommended to use a cache if you're using compression or encryption.
// This value specifies how much data cache should hold in memory. A small size
// of cache means lower memory consumption and lookups/iterations would take
// longer. It is recommended to use a cache if you're using compression or encryption.
// If compression and encryption both are disabled, adding a cache will lead to
// unnecessary overhead which will affect the read performance. Setting size to zero disables the
// cache altogether.
// unnecessary overhead which will affect the read performance. Setting size to
// zero disables the cache altogether.
//
// Default value of MaxCacheSize is zero.
func (opt Options) WithMaxCacheSize(size int64) Options {
opt.MaxCacheSize = size
// Default value of BlockCacheSize is zero.
func (opt Options) WithBlockCacheSize(size int64) Options {
opt.BlockCacheSize = size
return opt
}

Expand Down Expand Up @@ -619,22 +610,6 @@ func (opt Options) WithBypassLockGuard(b bool) Options {
return opt
}

// WithMaxBfCacheSize returns a new Options value with MaxBfCacheSize set to the given value.
//
// This value specifies how much memory should be used by the bloom filters.
// Badger uses bloom filters to speed up lookups. Each table has its own bloom
// filter and each bloom filter is approximately of 5 MB.
//
// Zero value for BfCacheSize means all the bloom filters will be kept in
// memory and the cache is disabled.
//
// The default value of MaxBfCacheSize is 0 which means all bloom filters will
// be kept in memory.
func (opt Options) WithMaxBfCacheSize(size int64) Options {
opt.MaxBfCacheSize = size
return opt
}

// WithLoadBloomsOnOpen returns a new Options value with LoadBloomsOnOpen set to the given value.
//
// Badger uses bloom filters to speed up key lookups. When LoadBloomsOnOpen is set
Expand All @@ -648,6 +623,24 @@ func (opt Options) WithLoadBloomsOnOpen(b bool) Options {
return opt
}

// WithIndexCacheSize returns a new Options value with IndexCacheSize set to
// the given value.
//
// This value specifies how much memory should be used by table indices. These
// indices include the block offsets and the bloomfilters. Badger uses bloom
// filters to speed up lookups. Each table has its own bloom
// filter and each bloom filter is approximately of 5 MB.
//
// Zero value for IndexCacheSize means all the indices will be kept in
// memory and the cache is disabled.
//
// The default value of IndexCacheSize is 0 which means all indices are kept in
// memory.
func (opt Options) WithIndexCacheSize(size int64) Options {
opt.IndexCacheSize = size
return opt
}

// WithDetectConflicts returns a new Options value with DetectConflicts set to the given value.
//
// Detect conflicts options determines if the transactions would be checked for
Expand All @@ -661,38 +654,3 @@ func (opt Options) WithDetectConflicts(b bool) Options {
opt.DetectConflicts = b
return opt
}

// WithKeepBlockIndicesInCache returns a new Option value with KeepBlockOffsetInCache set to the
// given value.
//
// When this option is set badger will store the block offsets in a cache along with the blocks.
// The size of the cache is determined by the MaxCacheSize option.If the MaxCacheSize is set to
// zero, then MaxCacheSize is set to 100 mb. When indices are stored in the cache, the read
// performance might be affected but the cache limits the amount of memory used by the indices.
//
// The default value of KeepBlockOffsetInCache is false.
func (opt Options) WithKeepBlockIndicesInCache(val bool) Options {
opt.KeepBlockIndicesInCache = val

if val && opt.MaxCacheSize == 0 {
opt.MaxCacheSize = 100 << 20
}
return opt
}

// WithKeepBlocksInCache returns a new Option value with KeepBlocksInCache set to the
// given value.
//
// When this option is set badger will store the block in the cache. The size of the cache is
// determined by the MaxCacheSize option.If the MaxCacheSize is set to zero,
// then MaxCacheSize is set to 100 mb.
//
// The default value of KeepBlocksInCache is false.
func (opt Options) WithKeepBlocksInCache(val bool) Options {
opt.KeepBlocksInCache = val

if val && opt.MaxCacheSize == 0 {
opt.MaxCacheSize = 100 << 20
}
return opt
}
4 changes: 2 additions & 2 deletions stream_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ func (w *sortedWriter) createTable(builder *table.Builder) error {
fileID := w.db.lc.reserveFileID()
opts := buildTableOptions(w.db.opt)
opts.DataKey = builder.DataKey()
opts.Cache = w.db.blockCache
opts.BfCache = w.db.bfCache
opts.BlockCache = w.db.blockCache
opts.IndexCache = w.db.indexCache
var tbl *table.Table
if w.db.opt.InMemory {
var err error
Expand Down
Loading

0 comments on commit 7d288ec

Please sign in to comment.