From 6c074551f8f5a5b470357236cd90e860d0deb517 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Fri, 6 Nov 2020 19:37:43 +0530 Subject: [PATCH] fix(compaction): Don't drop data when split overlaps with top tables (#1587) Badger compactions are split into sub compactions if there are more than 3 tables bottom tables are involved. The current implementation would drop keys if the top table would overlap with a bottom table. Consider the following scenario ``` Top table => [C (deleted)] bot table => [ A ] [ B ] [ C (valid) ] [ D ] ``` When this range is compacted, the result was `[ A B C(valid) D ] ` (only `C(deleted)` was dropped). The expected result was `[A B D]` . This PR fixes the above-mentioned issue. The test in this PR fails on master but works with this change. --- db_test.go | 11 ++++++++--- levels.go | 10 +++++++++- levels_test.go | 43 +++++++++++++++++++++++++++++++++++++++++++ test.sh | 1 + 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/db_test.go b/db_test.go index 13ab34949..53aeb3423 100644 --- a/db_test.go +++ b/db_test.go @@ -524,15 +524,20 @@ func BenchmarkDbGrowth(b *testing.B) { // Put a lot of data to move some data to disk. // WARNING: This test might take a while but it should pass! func TestGetMore(t *testing.T) { + if !*manual { + t.Skip("Skipping test meant to be run manually.") + return + } runBadgerTest(t, nil, func(t *testing.T, db *DB) { - data := func(i int) []byte { return []byte(fmt.Sprintf("%b", i)) } - // n := 500000 - n := 10000 + n := 500000 m := 45 // Increasing would cause ErrTxnTooBig for i := 0; i < n; i += m { + if (i % 10000) == 0 { + fmt.Printf("Inserting i=%d\n", i) + } txn := db.NewTransaction(true) for j := i; j < i+m && j < n; j++ { require.NoError(t, txn.SetEntry(NewEntry(data(j), data(j)))) diff --git a/levels.go b/levels.go index c71bed157..0b3950817 100644 --- a/levels.go +++ b/levels.go @@ -1001,7 +1001,15 @@ func (s *levelsController) addSplits(cd *compactDef) { return } if i%N == N-1 { - addRange(t.Biggest()) + // Right should always have ts=maxUint64 otherwise we'll lose keys + // in subcompaction. Consider the following. + // Top table is [A1...C3(deleted)] + // bot table is [B1....C2] + // This will generate splits like [A1 ... C2] . Notice that we + // dropped the C3 which is the last key of the top table. + // See TestCompaction/with_split test. + right := y.KeyWithTs(y.ParseKey(t.Biggest()), math.MaxUint64) + addRange(right) } } } diff --git a/levels_test.go b/levels_test.go index 6eaf7b5db..f928df28b 100644 --- a/levels_test.go +++ b/levels_test.go @@ -403,6 +403,49 @@ func TestCompaction(t *testing.T) { getAllAndCheck(t, db, []keyValVersion{{"fooo", "barr", 2, 0}}) }) }) + t.Run("with splits", func(t *testing.T) { + runBadgerTest(t, &opt, func(t *testing.T, db *DB) { + l1 := []keyValVersion{{"C", "bar", 3, bitDelete}} + l21 := []keyValVersion{{"A", "bar", 2, 0}} + l22 := []keyValVersion{{"B", "bar", 2, 0}} + l23 := []keyValVersion{{"C", "bar", 2, 0}} + l24 := []keyValVersion{{"D", "bar", 2, 0}} + l3 := []keyValVersion{{"fooz", "baz", 1, 0}} + createAndOpen(db, l1, 1) + createAndOpen(db, l21, 2) + createAndOpen(db, l22, 2) + createAndOpen(db, l23, 2) + createAndOpen(db, l24, 2) + createAndOpen(db, l3, 3) + + // Set a high discard timestamp so that all the keys are below the discard timestamp. + db.SetDiscardTs(10) + + getAllAndCheck(t, db, []keyValVersion{ + {"A", "bar", 2, 0}, + {"B", "bar", 2, 0}, + {"C", "bar", 3, bitDelete}, + {"C", "bar", 2, 0}, + {"D", "bar", 2, 0}, + {"fooz", "baz", 1, 0}, + }) + cdef := compactDef{ + thisLevel: db.lc.levels[1], + nextLevel: db.lc.levels[2], + top: db.lc.levels[1].tables, + bot: db.lc.levels[2].tables, + t: db.lc.levelTargets(), + } + cdef.t.baseLevel = 2 + require.NoError(t, db.lc.runCompactDef(-1, 1, cdef)) + getAllAndCheck(t, db, []keyValVersion{ + {"A", "bar", 2, 0}, + {"B", "bar", 2, 0}, + {"D", "bar", 2, 0}, + {"fooz", "baz", 1, 0}, + }) + }) + }) }) } diff --git a/test.sh b/test.sh index 53778c8df..9077b7e96 100755 --- a/test.sh +++ b/test.sh @@ -78,6 +78,7 @@ manual() { go test $tags -run='TestIterateParallel' --manual=true go test $tags -run='TestBigStream' --manual=true go test $tags -run='TestGoroutineLeak' --manual=true + go test $tags -run='TestGetMore' --manual=true echo "==> DONE manual tests" }