From dd1123f963e2a62f3e622b557e0d01b5c31455a9 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Wed, 8 Jan 2020 17:41:28 +0530 Subject: [PATCH] Fix checkOverlap in compaction (#1166) The overlap check in compaction would keep additional keys in case the levels under compaction had overlap amongst themselves. This commit fixes it. This commit also fixes the `numVersionsToKeep` check. Without this commit, we would end up keeping `2` versions of a key even when the number of versions to keep was set to `1`. See `level 0 to level 1 with lower overlap` test. Fixes https://github.com/dgraph-io/badger/issues/1053 The following test fails on master but works with this commit ```go func TestCompaction(t *testing.T) { t.Run("level 0 to level 1", func(t *testing.T) { dir, err := ioutil.TempDir("", "badger-test") require.NoError(t, err) defer removeDir(dir) // Disable compactions and keep single version of each key. opt := DefaultOptions(dir).WithNumCompactors(0).WithNumVersionsToKeep(1) db, err := OpenManaged(opt) require.NoError(t, err) l0 := []keyValVersion{{"foo", "bar", 3}, {"fooz", "baz", 1}} l01 := []keyValVersion{{"foo", "bar", 2}} l1 := []keyValVersion{{"foo", "bar", 1}} // Level 0 has table l0 and l01. createAndOpen(db, l0, 0) createAndOpen(db, l01, 0) // Level 1 has table l1. createAndOpen(db, l1, 1) // Set a high discard timestamp so that all the keys are below the discard timestamp. db.SetDiscardTs(10) getAllAndCheck(t, db, []keyValVersion{ {"foo", "bar", 3}, {"foo", "bar", 2}, {"foo", "bar", 1}, {"fooz", "baz", 1}, }) cdef := compactDef{ thisLevel: db.lc.levels[0], nextLevel: db.lc.levels[1], top: db.lc.levels[0].tables, bot: db.lc.levels[1].tables, } require.NoError(t, db.lc.runCompactDef(0, cdef)) // foo version 2 should be dropped after compaction. getAllAndCheck(t, db, []keyValVersion{{"foo", "bar", 3}, {"fooz", "baz", 1}}) }) } ``` --- badger/levels.go | 54 ++++-- badger/levels_test.go | 441 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 474 insertions(+), 21 deletions(-) create mode 100644 badger/levels_test.go diff --git a/badger/levels.go b/badger/levels.go index 0a4b92f26..39a68b4a4 100644 --- a/badger/levels.go +++ b/badger/levels.go @@ -430,28 +430,35 @@ func (s *levelsController) pickCompactLevels() (prios []compactionPriority) { return prios } -// compactBuildTables merge topTables and botTables to form a list of new tables. +// checkOverlap checks if the given tables overlap with any level from the given "lev" onwards. +func (s *levelsController) checkOverlap(tables []*table.Table, lev int) bool { + kr := getKeyRange(tables...) + for i, lh := range s.levels { + if i < lev { // Skip upper levels. + continue + } + lh.RLock() + left, right := lh.overlappingTables(levelHandlerRLocked{}, kr) + lh.RUnlock() + if right-left > 0 { + return true + } + } + return false +} + +// compactBuildTables merges topTables and botTables to form a list of new tables. func (s *levelsController) compactBuildTables( lev int, cd compactDef) ([]*table.Table, func() error, error) { topTables := cd.top botTables := cd.bot - var hasOverlap bool - { - kr := getKeyRange(cd.top...) - for i, lh := range s.levels { - if i <= lev { // Skip upper levels. - continue - } - lh.RLock() - left, right := lh.overlappingTables(levelHandlerRLocked{}, kr) - lh.RUnlock() - if right-left > 0 { - hasOverlap = true - break - } - } - } + // Check overlap of the top level with the levels which are not being + // compacted in this compaction. We don't need to check overlap of the bottom + // tables with other levels because if the top tables overlap with any of the lower + // levels, it implies bottom level also overlaps because top and bottom tables + // overlap with each other. + hasOverlap := s.checkOverlap(cd.top, cd.nextLevel.level+1) // Try to collect stats so that we can inform value log about GC. That would help us find which // value log file should be GCed. @@ -561,10 +568,15 @@ func (s *levelsController) compactBuildTables( // versions which are below the minReadTs, otherwise, we might end up discarding the // only valid version for a running transaction. numVersions++ - lastValidVersion := vs.Meta&bitDiscardEarlierVersions > 0 - if isDeletedOrExpired(vs.Meta, vs.ExpiresAt) || - numVersions > s.kv.opt.NumVersionsToKeep || - lastValidVersion { + + // Keep the current version and discard all the next versions if + // - The `discardEarlierVersions` bit is set OR + // - We've already processed `NumVersionsToKeep` number of versions + // (including the current item being processed) + lastValidVersion := vs.Meta&bitDiscardEarlierVersions > 0 || + numVersions == s.kv.opt.NumVersionsToKeep + + if isDeletedOrExpired(vs.Meta, vs.ExpiresAt) || lastValidVersion { // If this version of the key is deleted or expired, skip all the rest of the // versions. Ensure that we're only removing versions below readTs. skipKey = y.SafeCopy(skipKey, it.Key()) diff --git a/badger/levels_test.go b/badger/levels_test.go new file mode 100644 index 000000000..51e7baf72 --- /dev/null +++ b/badger/levels_test.go @@ -0,0 +1,441 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package badger + +import ( + "math" + "testing" + + "github.com/dgraph-io/badger/v2/options" + "github.com/dgraph-io/badger/v2/pb" + "github.com/dgraph-io/badger/v2/table" + "github.com/dgraph-io/badger/v2/y" + "github.com/stretchr/testify/require" +) + +// createAndOpen creates a table with the given data and adds it to the given level. +func createAndOpen(db *DB, td []keyValVersion, level int) { + opts := table.Options{ + BlockSize: db.opt.BlockSize, + BloomFalsePositive: db.opt.BloomFalsePositive, + LoadingMode: options.LoadToRAM, + ChkMode: options.NoVerification, + } + b := table.NewTableBuilder(opts) + + // Add all keys and versions to the table. + for _, item := range td { + key := y.KeyWithTs([]byte(item.key), uint64(item.version)) + val := y.ValueStruct{Value: []byte(item.val), Meta: item.meta} + b.Add(key, val, 0) + } + fd, err := y.CreateSyncedFile(table.NewFilename(db.lc.reserveFileID(), db.opt.Dir), true) + if err != nil { + panic(err) + } + + if _, err = fd.Write(b.Finish()); err != nil { + panic(err) + } + tab, err := table.OpenTable(fd, opts) + if err != nil { + panic(err) + } + if err := db.manifest.addChanges([]*pb.ManifestChange{ + newCreateChange(tab.ID(), level, 0, tab.CompressionType()), + }); err != nil { + panic(err) + } + // Add table to the given level. + db.lc.levels[level].tables = append(db.lc.levels[level].tables, tab) +} + +type keyValVersion struct { + key string + val string + version int + meta byte +} + +func TestCheckOverlap(t *testing.T) { + t.Run("overlap", func(t *testing.T) { + // This test consists of one table on level 0 and one on level 1. + // There is an overlap amongst the tables but there is no overlap + // with rest of the levels. + t.Run("same keys", func(t *testing.T) { + runBadgerTest(t, nil, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"foo", "bar", 3, 0}} + l1 := []keyValVersion{{"foo", "bar", 2, 0}} + createAndOpen(db, l0, 0) + createAndOpen(db, l1, 1) + + // Level 0 should overlap with level 0 tables. + require.True(t, db.lc.checkOverlap(db.lc.levels[0].tables, 0)) + // Level 1 should overlap with level 0 tables (they have the same keys). + require.True(t, db.lc.checkOverlap(db.lc.levels[0].tables, 1)) + // Level 2 and 3 should not overlap with level 0 tables. + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 2)) + require.False(t, db.lc.checkOverlap(db.lc.levels[1].tables, 2)) + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 3)) + require.False(t, db.lc.checkOverlap(db.lc.levels[1].tables, 3)) + + }) + }) + t.Run("overlapping keys", func(t *testing.T) { + runBadgerTest(t, nil, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"a", "x", 1, 0}, {"b", "x", 1, 0}, {"foo", "bar", 3, 0}} + l1 := []keyValVersion{{"foo", "bar", 2, 0}} + createAndOpen(db, l0, 0) + createAndOpen(db, l1, 1) + + // Level 0 should overlap with level 0 tables. + require.True(t, db.lc.checkOverlap(db.lc.levels[0].tables, 0)) + require.True(t, db.lc.checkOverlap(db.lc.levels[1].tables, 1)) + // Level 1 should overlap with level 0 tables, "foo" key is common. + require.True(t, db.lc.checkOverlap(db.lc.levels[0].tables, 1)) + // Level 2 and 3 should not overlap with level 0 tables. + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 2)) + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 3)) + }) + }) + }) + t.Run("non-overlapping", func(t *testing.T) { + runBadgerTest(t, nil, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"a", "x", 1, 0}, {"b", "x", 1, 0}, {"c", "bar", 3, 0}} + l1 := []keyValVersion{{"foo", "bar", 2, 0}} + createAndOpen(db, l0, 0) + createAndOpen(db, l1, 1) + + // Level 1 should not overlap with level 0 tables + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 1)) + // Level 2 and 3 should not overlap with level 0 tables. + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 2)) + require.False(t, db.lc.checkOverlap(db.lc.levels[0].tables, 3)) + }) + }) +} + +func getAllAndCheck(t *testing.T, db *DB, expected []keyValVersion) { + db.View(func(txn *Txn) error { + opt := DefaultIteratorOptions + opt.AllVersions = true + opt.InternalAccess = true + it := txn.NewIterator(opt) + defer it.Close() + i := 0 + for it.Rewind(); it.Valid(); it.Next() { + require.Less(t, i, len(expected), "DB has more number of key than expected") + item := it.Item() + v, err := item.ValueCopy(nil) + require.NoError(t, err) + // fmt.Printf("k: %s v: %d val: %s\n", item.key, item.Version(), v) + expect := expected[i] + require.Equal(t, expect.key, string(item.Key()), "expected key: %s actual key: %s", + expect.key, item.Key()) + require.Equal(t, expect.val, string(v), "key: %s expected value: %s actual %s", + item.key, expect.val, v) + require.Equal(t, expect.version, int(item.Version()), + "key: %s expected version: %d actual %d", item.key, expect.version, item.Version()) + require.Equal(t, expect.meta, item.meta, + "key: %s expected meta: %d meta %d", item.key, expect.meta, item.meta) + i++ + } + require.Equal(t, len(expected), i, "keys examined should be equal to keys expected") + return nil + }) + +} + +func TestCompaction(t *testing.T) { + // Disable compactions and keep single version of each key. + opt := DefaultOptions("").WithNumCompactors(0).WithNumVersionsToKeep(1) + opt.managedTxns = true + t.Run("level 0 to level 1", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"foo", "bar", 3, 0}, {"fooz", "baz", 1, 0}} + l01 := []keyValVersion{{"foo", "bar", 2, 0}} + l1 := []keyValVersion{{"foo", "bar", 1, 0}} + // Level 0 has table l0 and l01. + createAndOpen(db, l0, 0) + createAndOpen(db, l01, 0) + // Level 1 has table l1. + createAndOpen(db, l1, 1) + + // Set a high discard timestamp so that all the keys are below the discard timestamp. + db.SetDiscardTs(10) + + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 3, 0}, {"foo", "bar", 2, 0}, + {"foo", "bar", 1, 0}, {"fooz", "baz", 1, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + // foo version 2 should be dropped after compaction. + getAllAndCheck(t, db, []keyValVersion{{"foo", "bar", 3, 0}, {"fooz", "baz", 1, 0}}) + }) + }) + + t.Run("level 0 to level 1 with lower overlap", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"foo", "bar", 3, 0}, {"fooz", "baz", 1, 0}} + l01 := []keyValVersion{{"foo", "bar", 2, 0}} + l1 := []keyValVersion{{"foo", "bar", 1, 0}} + l2 := []keyValVersion{{"foo", "bar", 0, 0}} + // Level 0 has table l0 and l01. + createAndOpen(db, l0, 0) + createAndOpen(db, l01, 0) + // Level 1 has table l1. + createAndOpen(db, l1, 1) + // Level 2 has table l2. + createAndOpen(db, l2, 2) + + // Set a high discard timestamp so that all the keys are below the discard timestamp. + db.SetDiscardTs(10) + + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 3, 0}, {"foo", "bar", 2, 0}, {"foo", "bar", 1, 0}, + {"foo", "bar", 0, 0}, {"fooz", "baz", 1, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + // foo version 2 and version 1 should be dropped after compaction. + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 3, 0}, {"foo", "bar", 0, 0}, {"fooz", "baz", 1, 0}, + }) + }) + }) + + t.Run("level 1 to level 2", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l1 := []keyValVersion{{"foo", "bar", 3, 0}, {"fooz", "baz", 1, 0}} + l2 := []keyValVersion{{"foo", "bar", 2, 0}} + createAndOpen(db, l1, 1) + createAndOpen(db, l2, 2) + + // Set a high discard timestamp so that all the keys are below the discard timestamp. + db.SetDiscardTs(10) + + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 3, 0}, {"foo", "bar", 2, 0}, {"fooz", "baz", 1, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[1], + nextLevel: db.lc.levels[2], + top: db.lc.levels[1].tables, + bot: db.lc.levels[2].tables, + } + require.NoError(t, db.lc.runCompactDef(1, cdef)) + // foo version 2 should be dropped after compaction. + getAllAndCheck(t, db, []keyValVersion{{"foo", "bar", 3, 0}, {"fooz", "baz", 1, 0}}) + }) + }) +} + +func TestHeadKeyCleanup(t *testing.T) { + // Disable compactions and keep single version of each key. + opt := DefaultOptions("").WithNumCompactors(0).WithNumVersionsToKeep(1) + opt.managedTxns = true + + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{ + {string(head), "foo", 5, 0}, {string(head), "bar", 4, 0}, {string(head), "baz", 3, 0}, + } + l1 := []keyValVersion{{string(head), "fooz", 2, 0}, {string(head), "foozbaz", 1, 0}} + // Level 0 has table l0 and l01. + createAndOpen(db, l0, 0) + // Level 1 has table l1. + createAndOpen(db, l1, 1) + + // Set a high discard timestamp so that all the keys are below the discard timestamp. + db.SetDiscardTs(10) + + getAllAndCheck(t, db, []keyValVersion{ + {string(head), "foo", 5, 0}, {string(head), "bar", 4, 0}, {string(head), "baz", 3, 0}, + {string(head), "fooz", 2, 0}, {string(head), "foozbaz", 1, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + // foo version 2 should be dropped after compaction. + getAllAndCheck(t, db, []keyValVersion{{string(head), "foo", 5, 0}}) + }) +} + +func TestDiscardTs(t *testing.T) { + // Disable compactions and keep single version of each key. + opt := DefaultOptions("").WithNumCompactors(0).WithNumVersionsToKeep(1) + opt.managedTxns = true + + t.Run("all keys above discardTs", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"foo", "bar", 4, 0}, {"fooz", "baz", 3, 0}} + l01 := []keyValVersion{{"foo", "bar", 3, 0}} + l1 := []keyValVersion{{"foo", "bar", 2, 0}} + // Level 0 has table l0 and l01. + createAndOpen(db, l0, 0) + createAndOpen(db, l01, 0) + // Level 1 has table l1. + createAndOpen(db, l1, 1) + + // Set dicardTs to 1. All the keys are above discardTs. + db.SetDiscardTs(1) + + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 4, 0}, {"foo", "bar", 3, 0}, + {"foo", "bar", 2, 0}, {"fooz", "baz", 3, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + // No keys should be dropped. + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 4, 0}, {"foo", "bar", 3, 0}, + {"foo", "bar", 2, 0}, {"fooz", "baz", 3, 0}, + }) + }) + }) + t.Run("some keys above discardTs", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{ + {"foo", "bar", 4, 0}, {"foo", "bar", 3, 0}, + {"foo", "bar", 2, 0}, {"fooz", "baz", 2, 0}, + } + l1 := []keyValVersion{{"foo", "bbb", 1, 0}} + createAndOpen(db, l0, 0) + createAndOpen(db, l1, 1) + + // Set dicardTs to 3. foo2 and foo1 should be dropped. + db.SetDiscardTs(3) + + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 4, 0}, {"foo", "bar", 3, 0}, {"foo", "bar", 2, 0}, + {"foo", "bbb", 1, 0}, {"fooz", "baz", 2, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + // foo1 and foo2 should be dropped. + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 4, 0}, {"foo", "bar", 3, 0}, {"fooz", "baz", 2, 0}, + }) + }) + }) + t.Run("all keys below discardTs", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"foo", "bar", 4, 0}, {"fooz", "baz", 3, 0}} + l01 := []keyValVersion{{"foo", "bar", 3, 0}} + l1 := []keyValVersion{{"foo", "bar", 2, 0}} + // Level 0 has table l0 and l01. + createAndOpen(db, l0, 0) + createAndOpen(db, l01, 0) + // Level 1 has table l1. + createAndOpen(db, l1, 1) + + // Set dicardTs to 10. All the keys are below discardTs. + db.SetDiscardTs(10) + + getAllAndCheck(t, db, []keyValVersion{ + {"foo", "bar", 4, 0}, {"foo", "bar", 3, 0}, + {"foo", "bar", 2, 0}, {"fooz", "baz", 3, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + // Only one version of every key should be left. + getAllAndCheck(t, db, []keyValVersion{{"foo", "bar", 4, 0}, {"fooz", "baz", 3, 0}}) + }) + }) +} + +// This is a test to ensure that the first entry with DiscardEarlierversion bit < DiscardTs +// is kept around (when numversionstokeep is infinite). +func TestDiscardFirstVersion(t *testing.T) { + opt := DefaultOptions("") + opt.NumCompactors = 0 + opt.NumVersionsToKeep = math.MaxUint32 + opt.managedTxns = true + + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l0 := []keyValVersion{{"foo", "bar", 1, 0}} + l01 := []keyValVersion{{"foo", "bar", 2, bitDiscardEarlierVersions}} + l02 := []keyValVersion{{"foo", "bar", 3, 0}} + l03 := []keyValVersion{{"foo", "bar", 4, 0}} + l04 := []keyValVersion{{"foo", "bar", 9, 0}} + l05 := []keyValVersion{{"foo", "bar", 10, bitDiscardEarlierVersions}} + + // Level 0 has all the tables. + createAndOpen(db, l0, 0) + createAndOpen(db, l01, 0) + createAndOpen(db, l02, 0) + createAndOpen(db, l03, 0) + createAndOpen(db, l04, 0) + createAndOpen(db, l05, 0) + + // Discard Time stamp is set to 7. + db.SetDiscardTs(7) + + // Compact L0 to L1 + cdef := compactDef{ + thisLevel: db.lc.levels[0], + nextLevel: db.lc.levels[1], + top: db.lc.levels[0].tables, + bot: db.lc.levels[1].tables, + } + require.NoError(t, db.lc.runCompactDef(0, cdef)) + + // - Version 10, 9 lie above version 7 so they should be there. + // - Version 4, 3, 2 lie below the discardTs but they don't have the + // "bitDiscardEarlierVersions" versions set so they should not be removed because number + // of versions to keep is set to infinite. + // - Version 1 is below DiscardTS and below the first "bitDiscardEarlierVersions" + // marker so IT WILL BE REMOVED. + ExpectedKeys := []keyValVersion{ + {"foo", "bar", 10, bitDiscardEarlierVersions}, + {"foo", "bar", 9, 0}, + {"foo", "bar", 4, 0}, + {"foo", "bar", 3, 0}, + {"foo", "bar", 2, bitDiscardEarlierVersions}} + + getAllAndCheck(t, db, ExpectedKeys) + }) +}