Skip to content

Commit

Permalink
(release/v1.6) Confirm badgerMove entry required before rewrite (#1302
Browse files Browse the repository at this point in the history
) (#1502)

* Confirm `badgerMove` entry required before rewrite (#1302)

Value log badgerMove entries for deleted keys are always rewritten.
This leads to exponential DB size growth in heavy write/delete use
cases with value pointer entries. To prevent this, a check has been
added to confirm the value is still needed for badgerMove entries.

(cherry picked from commit 3e1cdd9)

* remove KeepL0InMemory in db_test

Co-authored-by: Julian Hegler <[email protected]>
  • Loading branch information
NamanJain8 and ou05020 authored Sep 9, 2020
1 parent afd9eb5 commit 654779f
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 3 deletions.
100 changes: 100 additions & 0 deletions db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,106 @@ func TestForceCompactL0(t *testing.T) {
require.NoError(t, db.Close())
}

func dirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if !info.IsDir() {
size += info.Size()
}
return err
})
return (size >> 20), err
}

// BenchmarkDbGrowth ensures DB does not grow with repeated adds and deletes.
//
// New keys are created with each for-loop iteration. During each
// iteration, the previous for-loop iteration's keys are deleted.
//
// To reproduce continous growth problem due to `badgerMove` keys,
// update `value.go` `discardEntry` line 1628 to return false
//
// Also with PR #1303, the delete keys are properly cleaned which
// further reduces disk size.
func BenchmarkDbGrowth(b *testing.B) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(b, err)
defer removeDir(dir)

start := 0
lastStart := 0
numKeys := 2000
valueSize := 1024
value := make([]byte, valueSize)

discardRatio := 0.001
maxWrites := 200
opts := getTestOptions(dir)
opts.ValueLogFileSize = 64 << 15
opts.MaxTableSize = 4 << 15
opts.LevelOneSize = 16 << 15
opts.NumVersionsToKeep = 1
opts.NumLevelZeroTables = 1
opts.NumLevelZeroTablesStall = 2
db, err := Open(opts)
require.NoError(b, err)
for numWrites := 0; numWrites < maxWrites; numWrites++ {
txn := db.NewTransaction(true)
if start > 0 {
for i := lastStart; i < start; i++ {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key[:], uint64(i))
err := txn.Delete(key)
if err == ErrTxnTooBig {
require.NoError(b, txn.Commit())
txn = db.NewTransaction(true)
} else {
require.NoError(b, err)
}
}
}

for i := start; i < numKeys+start; i++ {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key[:], uint64(i))
err := txn.SetEntry(NewEntry(key, value))
if err == ErrTxnTooBig {
require.NoError(b, txn.Commit())
txn = db.NewTransaction(true)
} else {
require.NoError(b, err)
}
}
require.NoError(b, txn.Commit())
require.NoError(b, db.Flatten(1))
for {
err = db.RunValueLogGC(discardRatio)
if err == ErrNoRewrite {
break
} else {
require.NoError(b, err)
}
}
size, err := dirSize(dir)
require.NoError(b, err)
fmt.Printf("Badger DB Size = %dMB\n", size)
lastStart = start
start += numKeys
}

db.Close()
size, err := dirSize(dir)
require.NoError(b, err)
require.LessOrEqual(b, size, int64(16))
fmt.Printf("Badger DB Size = %dMB\n", size)
}

// Put a lot of data to move some data to disk.
// WARNING: This test might take a while but it should pass!
func TestGetMore(t *testing.T) {
Expand Down
16 changes: 13 additions & 3 deletions value.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func (vlog *valueLog) rewrite(f *logFile, tr trace.Trace) error {
if err != nil {
return err
}
if discardEntry(e, vs) {
if discardEntry(e, vs, vlog.db) {
return nil
}

Expand Down Expand Up @@ -1324,7 +1324,7 @@ func (vlog *valueLog) pickLog(head valuePointer, tr trace.Trace) (files []*logFi
return files
}

func discardEntry(e Entry, vs y.ValueStruct) bool {
func discardEntry(e Entry, vs y.ValueStruct, db *DB) bool {
if vs.Version != y.ParseTs(e.Key) {
// Version not found. Discard.
return true
Expand All @@ -1340,6 +1340,16 @@ func discardEntry(e Entry, vs y.ValueStruct) bool {
// Just a txn finish entry. Discard.
return true
}
if bytes.HasPrefix(e.Key, badgerMove) {
// Verify the actual key entry without the badgerPrefix has not been deleted.
// If this is not done the badgerMove entry will be kept forever moving from
// vlog to vlog during rewrites.
avs, err := db.get(e.Key[len(badgerMove):])
if err != nil {
return false
}
return avs.Version == 0
}
return false
}

Expand Down Expand Up @@ -1412,7 +1422,7 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
if err != nil {
return err
}
if discardEntry(e, vs) {
if discardEntry(e, vs, vlog.db) {
r.discard += esz
return nil
}
Expand Down

0 comments on commit 654779f

Please sign in to comment.