Skip to content

Commit cf76b23

Browse files
committed
combine memtables before flushing to L0
Taken from PR #1696, commit b21f591
1 parent c65a8ac commit cf76b23

File tree

1 file changed

+64
-53
lines changed

1 file changed

+64
-53
lines changed

db.go

+64-53
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ type DB struct {
109109
lc *levelsController
110110
vlog valueLog
111111
writeCh chan *request
112-
flushChan chan flushTask // For flushing memtables.
112+
flushChan chan *memTable // For flushing memtables.
113113
closeOnce sync.Once // For closing DB only once.
114114

115115
blockWrites int32
@@ -240,7 +240,7 @@ func Open(opt Options) (*DB, error) {
240240

241241
db := &DB{
242242
imm: make([]*memTable, 0, opt.NumMemtables),
243-
flushChan: make(chan flushTask, opt.NumMemtables),
243+
flushChan: make(chan *memTable, opt.NumMemtables),
244244
writeCh: make(chan *request, kvWriteChCapacity),
245245
opt: opt,
246246
manifest: manifestFile,
@@ -355,7 +355,7 @@ func Open(opt Options) (*DB, error) {
355355
}()
356356
// Flush them to disk asap.
357357
for _, mt := range db.imm {
358-
db.flushChan <- flushTask{mt: mt}
358+
db.flushChan <- mt
359359
}
360360
}
361361
// We do increment nextTxnTs below. So, no need to do it here.
@@ -568,12 +568,12 @@ func (db *DB) close() (err error) {
568568
} else {
569569
db.opt.Debugf("Flushing memtable")
570570
for {
571-
pushedFlushTask := func() bool {
571+
pushedMemTable := func() bool {
572572
db.lock.Lock()
573573
defer db.lock.Unlock()
574574
y.AssertTrue(db.mt != nil)
575575
select {
576-
case db.flushChan <- flushTask{mt: db.mt}:
576+
case db.flushChan <- db.mt:
577577
db.imm = append(db.imm, db.mt) // Flusher will attempt to remove this from s.imm.
578578
db.mt = nil // Will segfault if we try writing!
579579
db.opt.Debugf("pushed to flush chan\n")
@@ -586,7 +586,7 @@ func (db *DB) close() (err error) {
586586
}
587587
return false
588588
}()
589-
if pushedFlushTask {
589+
if pushedMemTable {
590590
break
591591
}
592592
time.Sleep(10 * time.Millisecond)
@@ -826,6 +826,7 @@ func (db *DB) writeRequests(reqs []*request) error {
826826
}
827827
count += len(b.Entries)
828828
var i uint64
829+
var err error
829830
for err = db.ensureRoomForWrite(); err == errNoRoom; err = db.ensureRoomForWrite() {
830831
i++
831832
if i%100 == 0 {
@@ -987,7 +988,7 @@ func (db *DB) ensureRoomForWrite() error {
987988
}
988989

989990
select {
990-
case db.flushChan <- flushTask{mt: db.mt}:
991+
case db.flushChan <- db.mt:
991992
db.opt.Debugf("Flushing memtable, mt.size=%d size of flushChan: %d\n",
992993
db.mt.sl.MemSize(), len(db.flushChan))
993994
// We manage to push this task. Let's modify imm.
@@ -1009,12 +1010,12 @@ func arenaSize(opt Options) int64 {
10091010
}
10101011

10111012
// buildL0Table builds a new table from the memtable.
1012-
func buildL0Table(ft flushTask, bopts table.Options) *table.Builder {
1013-
iter := ft.mt.sl.NewIterator()
1013+
func buildL0Table(iter y.Iterator, dropPrefixes [][]byte, bopts table.Options) *table.Builder {
10141014
defer iter.Close()
1015+
10151016
b := table.NewTableBuilder(bopts)
1016-
for iter.SeekToFirst(); iter.Valid(); iter.Next() {
1017-
if len(ft.dropPrefixes) > 0 && hasAnyPrefixes(iter.Key(), ft.dropPrefixes) {
1017+
for iter.Rewind(); iter.Valid(); iter.Next() {
1018+
if len(dropPrefixes) > 0 && hasAnyPrefixes(iter.Key(), dropPrefixes) {
10181019
continue
10191020
}
10201021
vs := iter.Value()
@@ -1024,23 +1025,14 @@ func buildL0Table(ft flushTask, bopts table.Options) *table.Builder {
10241025
}
10251026
b.Add(iter.Key(), iter.Value(), vp.Len)
10261027
}
1027-
return b
1028-
}
10291028

1030-
type flushTask struct {
1031-
mt *memTable
1032-
dropPrefixes [][]byte
1029+
return b
10331030
}
10341031

1035-
// handleFlushTask must be run serially.
1036-
func (db *DB) handleFlushTask(ft flushTask) error {
1037-
// There can be a scenario, when empty memtable is flushed.
1038-
if ft.mt.sl.Empty() {
1039-
return nil
1040-
}
1041-
1032+
// handleMemTableFlush must be run serially.
1033+
func (db *DB) handleMemTableFlush(itr y.Iterator, dropPrefixes [][]byte) error {
10421034
bopts := buildTableOptions(db)
1043-
builder := buildL0Table(ft, bopts)
1035+
builder := buildL0Table(itr, nil, bopts)
10441036
defer builder.Close()
10451037

10461038
// buildL0Table can return nil if the none of the items in the skiplist are
@@ -1069,39 +1061,62 @@ func (db *DB) handleFlushTask(ft flushTask) error {
10691061
return err
10701062
}
10711063

1072-
// flushMemtable must keep running until we send it an empty flushTask. If there
1073-
// are errors during handling the flush task, we'll retry indefinitely.
1064+
// flushMemtable must keep running until we send it an empty memtable. If there
1065+
// are errors during handling the memtable flush, we'll retry indefinitely.
10741066
func (db *DB) flushMemtable(lc *z.Closer) error {
10751067
defer lc.Done()
10761068

1077-
for ft := range db.flushChan {
1078-
if ft.mt == nil {
1079-
// We close db.flushChan now, instead of sending a nil ft.mt.
1080-
continue
1081-
}
1082-
for {
1083-
err := db.handleFlushTask(ft)
1084-
if err == nil {
1069+
var sz int64
1070+
var itrs []y.Iterator
1071+
var mts []*memTable
1072+
for { //nolint:gosimple
1073+
select {
1074+
case mt, ok := <-db.flushChan:
1075+
if mt != nil {
1076+
itrs = append(itrs, mt.sl.NewUniIterator(false))
1077+
mts = append(mts, mt)
1078+
sz += mt.sl.MemSize()
1079+
if sz < db.opt.MemTableSize {
1080+
continue
1081+
}
1082+
}
1083+
1084+
if !ok && len(mts) == 0 {
1085+
return nil
1086+
}
1087+
if len(mts) == 0 {
1088+
continue
1089+
}
1090+
1091+
mitr := table.NewMergeIterator(itrs, false)
1092+
for {
1093+
if err := db.handleMemTableFlush(mitr, nil); err != nil {
1094+
// Encountered error. Retry indefinitely.
1095+
db.opt.Errorf("error flushing memtable to disk: %v, retrying", err)
1096+
time.Sleep(time.Second)
1097+
continue
1098+
}
1099+
10851100
// Update s.imm. Need a lock.
10861101
db.lock.Lock()
1087-
// This is a single-threaded operation. ft.mt corresponds to the head of
1088-
// db.imm list. Once we flush it, we advance db.imm. The next ft.mt
1102+
// This is a single-threaded operation. mt corresponds to the head of
1103+
// db.imm list. Once we flush it, we advance db.imm. The next mt
10891104
// which would arrive here would match db.imm[0], because we acquire a
10901105
// lock over DB when pushing to flushChan.
10911106
// TODO: This logic is dirty AF. Any change and this could easily break.
1092-
y.AssertTrue(ft.mt == db.imm[0])
1093-
db.imm = db.imm[1:]
1094-
ft.mt.DecrRef() // Return memory.
1107+
for _, mt := range mts {
1108+
y.AssertTrue(mt == db.imm[0])
1109+
db.imm = db.imm[1:]
1110+
mt.DecrRef() // Return memory.
1111+
}
10951112
db.lock.Unlock()
1096-
10971113
break
10981114
}
1099-
// Encountered error. Retry indefinitely.
1100-
db.opt.Errorf("Failure while flushing memtable to disk: %v. Retrying...\n", err)
1101-
time.Sleep(time.Second)
1115+
1116+
// Reset everything.
1117+
itrs, mts, sz = itrs[:0], mts[:0], 0
11021118
}
11031119
}
1104-
return nil
11051120
}
11061121

11071122
func exists(path string) (bool, error) {
@@ -1521,7 +1536,7 @@ func (db *DB) startCompactions() {
15211536
func (db *DB) startMemoryFlush() {
15221537
// Start memory fluhser.
15231538
if db.closers.memtable != nil {
1524-
db.flushChan = make(chan flushTask, db.opt.NumMemtables)
1539+
db.flushChan = make(chan *memTable, db.opt.NumMemtables)
15251540
db.closers.memtable = z.NewCloser(1)
15261541
go func() {
15271542
_ = db.flushMemtable(db.closers.memtable)
@@ -1627,7 +1642,7 @@ func (db *DB) prepareToDrop() (func(), error) {
16271642
panic("Attempting to drop data in read-only mode.")
16281643
}
16291644
// In order prepare for drop, we need to block the incoming writes and
1630-
// write it to db. Then, flush all the pending flushtask. So that, we
1645+
// write it to db. Then, flush all the pending memtable. So that, we
16311646
// don't miss any entries.
16321647
if err := db.blockWrite(); err != nil {
16331648
return nil, err
@@ -1676,7 +1691,7 @@ func (db *DB) dropAll() (func(), error) {
16761691
if err != nil {
16771692
return f, err
16781693
}
1679-
// prepareToDrop will stop all the incomming write and flushes any pending flush tasks.
1694+
// prepareToDrop will stop all the incomming write and flushes any pending memtables.
16801695
// Before we drop, we'll stop the compaction because anyways all the datas are going to
16811696
// be deleted.
16821697
db.stopCompactions()
@@ -1758,13 +1773,9 @@ func (db *DB) DropPrefix(prefixes ...[]byte) error {
17581773
memtable.DecrRef()
17591774
continue
17601775
}
1761-
task := flushTask{
1762-
mt: memtable,
1763-
// Ensure that the head of value log gets persisted to disk.
1764-
dropPrefixes: filtered,
1765-
}
1776+
itr := memtable.sl.NewUniIterator(false)
17661777
db.opt.Debugf("Flushing memtable")
1767-
if err := db.handleFlushTask(task); err != nil {
1778+
if err := db.handleMemTableFlush(itr, filtered); err != nil {
17681779
db.opt.Errorf("While trying to flush memtable: %v", err)
17691780
return err
17701781
}

0 commit comments

Comments
 (0)