Skip to content

Commit 3747be5

Browse files
author
Ibrahim Jarif
authored
Improve write stalling on level 0 and 1
We don't need to stall writes if Level 1 does not have enough space. Level 1 is stored on the disk and it should be okay to have more number of tables (more size) on Level 1 than the `max level 1 size`. These tables will eventually be compacted to lower levels. This commit changes the following - We no longer stall writes if L1 doesn't have enough space. - We stall writes on level 0 only if `KeepL0InMemory` is true. - Upper levels (L0, L1, etc) get priority in compaction (previously, level with higher priority score would get preference)
1 parent 5870b7b commit 3747be5

File tree

3 files changed

+151
-12
lines changed

3 files changed

+151
-12
lines changed

level_handler.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,9 @@ func (s *levelHandler) tryAddLevel0Table(t *table.Table) bool {
188188
// Need lock as we may be deleting the first table during a level 0 compaction.
189189
s.Lock()
190190
defer s.Unlock()
191-
if len(s.tables) >= s.db.opt.NumLevelZeroTablesStall {
191+
// Return false only if L0 is in memory and number of tables is more than number of
192+
// ZeroTableStall. For on disk L0, we should just add the tables to the level.
193+
if s.db.opt.KeepL0InMemory && len(s.tables) >= s.db.opt.NumLevelZeroTablesStall {
192194
return false
193195
}
194196

levels.go

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -424,9 +424,10 @@ func (s *levelsController) pickCompactLevels() (prios []compactionPriority) {
424424
prios = append(prios, pri)
425425
}
426426
}
427-
sort.Slice(prios, func(i, j int) bool {
428-
return prios[i].score > prios[j].score
429-
})
427+
// We used to sort compaction priorities based on the score. But, we
428+
// decided to compact based on the level, not the priority. So, upper
429+
// levels (level 0, level 1, etc) always get compacted first, before the
430+
// lower levels -- this allows us to avoid stalls.
430431
return prios
431432
}
432433

@@ -937,15 +938,13 @@ func (s *levelsController) addLevel0Table(t *table.Table) error {
937938
s.cstatus.RUnlock()
938939
timeStart = time.Now()
939940
}
940-
// Before we unstall, we need to make sure that level 0 and 1 are healthy. Otherwise, we
941-
// will very quickly fill up level 0 again and if the compaction strategy favors level 0,
942-
// then level 1 is going to super full.
941+
// Before we unstall, we need to make sure that level 0 is healthy. Otherwise, we
942+
// will very quickly fill up level 0 again.
943943
for i := 0; ; i++ {
944-
// Passing 0 for delSize to compactable means we're treating incomplete compactions as
945-
// not having finished -- we wait for them to finish. Also, it's crucial this behavior
946-
// replicates pickCompactLevels' behavior in computing compactability in order to
947-
// guarantee progress.
948-
if !s.isLevel0Compactable() && !s.levels[1].isCompactable(0) {
944+
// It's crucial that this behavior replicates pickCompactLevels' behavior in
945+
// computing compactability in order to guarantee progress.
946+
// Break the loop once L0 has enough space to accommodate new tables.
947+
if !s.isLevel0Compactable() {
949948
break
950949
}
951950
time.Sleep(10 * time.Millisecond)

levels_test.go

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package badger
1919
import (
2020
"math"
2121
"testing"
22+
"time"
2223

2324
"github.com/dgraph-io/badger/v2/options"
2425
"github.com/dgraph-io/badger/v2/pb"
@@ -439,3 +440,140 @@ func TestDiscardFirstVersion(t *testing.T) {
439440
getAllAndCheck(t, db, ExpectedKeys)
440441
})
441442
}
443+
444+
// This test ensures we don't stall when L1's size is greater than opt.LevelOneSize.
445+
// We should stall only when L0 tables more than the opt.NumLevelZeroTableStall.
446+
func TestL1Stall(t *testing.T) {
447+
opt := DefaultOptions("")
448+
// Disable all compactions.
449+
opt.NumCompactors = 0
450+
// Number of level zero tables.
451+
opt.NumLevelZeroTables = 3
452+
// Addition of new tables will stall if there are 4 or more L0 tables.
453+
opt.NumLevelZeroTablesStall = 4
454+
// Level 1 size is 10 bytes.
455+
opt.LevelOneSize = 10
456+
457+
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
458+
// Level 0 has 4 tables.
459+
db.lc.levels[0].Lock()
460+
db.lc.levels[0].tables = []*table.Table{createEmptyTable(db), createEmptyTable(db),
461+
createEmptyTable(db), createEmptyTable(db)}
462+
db.lc.levels[0].Unlock()
463+
464+
timeout := time.After(5 * time.Second)
465+
done := make(chan bool)
466+
467+
// This is important. Set level 1 size more than the opt.LevelOneSize (we've set it to 10).
468+
db.lc.levels[1].totalSize = 100
469+
go func() {
470+
tab := createEmptyTable(db)
471+
db.lc.addLevel0Table(tab)
472+
tab.DecrRef()
473+
done <- true
474+
}()
475+
time.Sleep(time.Second)
476+
477+
db.lc.levels[0].Lock()
478+
// Drop two tables from Level 0 so that addLevel0Table can make progress. Earlier table
479+
// count was 4 which is equal to L0 stall count.
480+
toDrop := db.lc.levels[0].tables[:2]
481+
decrRefs(toDrop)
482+
db.lc.levels[0].tables = db.lc.levels[0].tables[2:]
483+
db.lc.levels[0].Unlock()
484+
485+
select {
486+
case <-timeout:
487+
t.Fatal("Test didn't finish in time")
488+
case <-done:
489+
}
490+
})
491+
}
492+
493+
func createEmptyTable(db *DB) *table.Table {
494+
opts := table.Options{
495+
BloomFalsePositive: db.opt.BloomFalsePositive,
496+
LoadingMode: options.LoadToRAM,
497+
ChkMode: options.NoVerification,
498+
}
499+
b := table.NewTableBuilder(opts)
500+
// Add one key so that we can open this table.
501+
b.Add(y.KeyWithTs([]byte("foo"), 1), y.ValueStruct{}, 0)
502+
fd, err := y.CreateSyncedFile(table.NewFilename(db.lc.reserveFileID(), db.opt.Dir), true)
503+
if err != nil {
504+
panic(err)
505+
}
506+
507+
if _, err := fd.Write(b.Finish()); err != nil {
508+
panic(err)
509+
}
510+
tab, err := table.OpenTable(fd, table.Options{})
511+
if err != nil {
512+
panic(err)
513+
}
514+
// Add dummy entry to manifest file so that it doesn't complain during compaction.
515+
if err := db.manifest.addChanges([]*pb.ManifestChange{
516+
newCreateChange(tab.ID(), 0, 0, tab.CompressionType()),
517+
}); err != nil {
518+
panic(err)
519+
}
520+
521+
return tab
522+
}
523+
524+
func TestL0Stall(t *testing.T) {
525+
test := func(t *testing.T, opt *Options) {
526+
runBadgerTest(t, opt, func(t *testing.T, db *DB) {
527+
db.lc.levels[0].Lock()
528+
// Add NumLevelZeroTableStall+1 number of tables to level 0. This would fill up level
529+
// zero and all new additions are expected to stall if L0 is in memory.
530+
for i := 0; i < opt.NumLevelZeroTablesStall+1; i++ {
531+
db.lc.levels[0].tables = append(db.lc.levels[0].tables, createEmptyTable(db))
532+
}
533+
db.lc.levels[0].Unlock()
534+
535+
timeout := time.After(5 * time.Second)
536+
done := make(chan bool)
537+
538+
go func() {
539+
tab := createEmptyTable(db)
540+
db.lc.addLevel0Table(tab)
541+
tab.DecrRef()
542+
done <- true
543+
}()
544+
// Let it stall for a second.
545+
time.Sleep(time.Second)
546+
547+
select {
548+
case <-timeout:
549+
if opt.KeepL0InMemory {
550+
t.Log("Timeout triggered")
551+
// Mark this test as successful since L0 is in memory and the
552+
// addition of new table to L0 is supposed to stall.
553+
} else {
554+
t.Fatal("Test didn't finish in time")
555+
}
556+
case <-done:
557+
// The test completed before 5 second timeout. Mark it as successful.
558+
}
559+
})
560+
}
561+
562+
opt := DefaultOptions("")
563+
opt.EventLogging = false
564+
// Disable all compactions.
565+
opt.NumCompactors = 0
566+
// Number of level zero tables.
567+
opt.NumLevelZeroTables = 3
568+
// Addition of new tables will stall if there are 4 or more L0 tables.
569+
opt.NumLevelZeroTablesStall = 4
570+
571+
t.Run("with KeepL0InMemory", func(t *testing.T) {
572+
opt.KeepL0InMemory = true
573+
test(t, &opt)
574+
})
575+
t.Run("with L0 on disk", func(t *testing.T) {
576+
opt.KeepL0InMemory = false
577+
test(t, &opt)
578+
})
579+
}

0 commit comments

Comments
 (0)