Skip to content

Commit 4516b44

Browse files
author
Ibrahim Jarif
committed
Revert "Add support for caching bloomfilters (#1204)"
This reverts commit 4676ca9. Reading bloom filters from the cache leads to severe preformance degradation. Result of read bench tool 1. With bloom filters in cache Average read speed : 97.37 KB/s Total bytes read in 1 minute : 3.7 MB 2. With bloom filters stored in memory (no cache) Average read speed : 41.3833 MB/s Total bytes read in 1 minute : 2.4 GB
1 parent bdb2b13 commit 4516b44

File tree

3 files changed

+20
-86
lines changed

3 files changed

+20
-86
lines changed

table/builder_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func TestTableIndex(t *testing.T) {
5757
keysCount := 10000
5858
for _, opt := range opts {
5959
builder := NewTableBuilder(opt)
60-
filename := fmt.Sprintf("%s%c%d.sst", os.TempDir(), os.PathSeparator, rand.Uint32())
60+
filename := fmt.Sprintf("%s%c%d.sst", os.TempDir(), os.PathSeparator, rand.Int63())
6161
f, err := y.OpenSyncedFile(filename, true)
6262
require.NoError(t, err)
6363

table/table.go

Lines changed: 13 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package table
1818

1919
import (
2020
"crypto/aes"
21-
"encoding/binary"
2221
"fmt"
2322
"io"
2423
"math"
@@ -82,7 +81,7 @@ type TableInterface interface {
8281
DoesNotHave(hash uint64) bool
8382
}
8483

85-
// Table represents a loaded table file with the info we have about it.
84+
// Table represents a loaded table file with the info we have about it
8685
type Table struct {
8786
sync.Mutex
8887

@@ -98,11 +97,10 @@ type Table struct {
9897
smallest, biggest []byte // Smallest and largest keys (with timestamps).
9998
id uint64 // file id, part of filename
10099

100+
bf *z.Bloom
101101
Checksum []byte
102102
// Stores the total size of key-values stored in this table (including the size on vlog).
103103
estimatedSize uint64
104-
indexStart int
105-
indexLen int
106104

107105
IsInmemory bool // Set to true if the table is on level 0 and opened in memory.
108106
opt *Options
@@ -148,13 +146,6 @@ func (t *Table) DecrRef() error {
148146
if err := os.Remove(filename); err != nil {
149147
return err
150148
}
151-
// Delete all blocks from the cache.
152-
for i := range t.blockIndex {
153-
t.opt.Cache.Del(t.blockCacheKey(i))
154-
}
155-
// Delete bloom filter from the cache.
156-
t.opt.Cache.Del(t.bfCacheKey())
157-
158149
}
159150
return nil
160151
}
@@ -345,12 +336,10 @@ func (t *Table) readIndex() error {
345336
// Read index size from the footer.
346337
readPos -= 4
347338
buf = t.readNoFail(readPos, 4)
348-
t.indexLen = int(y.BytesToU32(buf))
349-
339+
indexLen := int(y.BytesToU32(buf))
350340
// Read index.
351-
readPos -= t.indexLen
352-
t.indexStart = readPos
353-
data := t.readNoFail(readPos, t.indexLen)
341+
readPos -= indexLen
342+
data := t.readNoFail(readPos, indexLen)
354343

355344
if err := y.VerifyChecksum(data, expectedChk); err != nil {
356345
return y.Wrapf(err, "failed to verify checksum for table: %s", t.Filename())
@@ -369,18 +358,11 @@ func (t *Table) readIndex() error {
369358
y.Check(err)
370359

371360
t.estimatedSize = index.EstimatedSize
372-
t.blockIndex = index.Offsets
373-
374-
// Avoid the cost of unmarshalling the bloom filters if the cache is absent.
375-
if t.opt.Cache != nil {
376-
var bf *z.Bloom
377-
if bf, err = z.JSONUnmarshal(index.BloomFilter); err != nil {
378-
return y.Wrapf(err, "failed to unmarshal bloom filter for the table %d in Table.readIndex",
379-
t.id)
380-
}
381-
382-
t.opt.Cache.Set(t.bfCacheKey(), bf, int64(len(index.BloomFilter)))
361+
if t.bf, err = z.JSONUnmarshal(index.BloomFilter); err != nil {
362+
return y.Wrapf(err, "failed to unmarshal bloom filter for the table %d in Table.readIndex",
363+
t.id)
383364
}
365+
t.blockIndex = index.Offsets
384366
return nil
385367
}
386368

@@ -461,25 +443,10 @@ func (t *Table) block(idx int) (*block, error) {
461443
return blk, nil
462444
}
463445

464-
// bfCacheKey returns the cache key for bloom filter.
465-
func (t *Table) bfCacheKey() []byte {
466-
y.AssertTrue(t.id < math.MaxUint32)
467-
buf := make([]byte, 4)
468-
binary.BigEndian.PutUint32(buf, uint32(t.id))
469-
470-
// Without the "bf" prefix, we will have conflict with the blockCacheKey.
471-
return append([]byte("bf"), buf...)
472-
}
473-
474-
func (t *Table) blockCacheKey(idx int) []byte {
475-
y.AssertTrue(t.id < math.MaxUint32)
446+
func (t *Table) blockCacheKey(idx int) uint64 {
447+
y.AssertTrue(t.ID() < math.MaxUint32)
476448
y.AssertTrue(uint32(idx) < math.MaxUint32)
477-
478-
buf := make([]byte, 8)
479-
// Assume t.ID does not overflow uint32.
480-
binary.BigEndian.PutUint32(buf[:4], uint32(t.ID()))
481-
binary.BigEndian.PutUint32(buf[4:], uint32(idx))
482-
return buf
449+
return (t.ID() << 32) | uint64(idx)
483450
}
484451

485452
// EstimatedSize returns the total size of key-values stored in this table (including the
@@ -503,44 +470,7 @@ func (t *Table) ID() uint64 { return t.id }
503470

504471
// DoesNotHave returns true if (but not "only if") the table does not have the key hash.
505472
// It does a bloom filter lookup.
506-
func (t *Table) DoesNotHave(hash uint64) bool {
507-
var bf *z.Bloom
508-
509-
// Return fast if cache is absent.
510-
if t.opt.Cache == nil {
511-
bf, _ := t.readBloomFilter()
512-
return !bf.Has(hash)
513-
}
514-
515-
// Check if the bloomfilter exists in the cache.
516-
if b, ok := t.opt.Cache.Get(t.bfCacheKey()); b != nil && ok {
517-
bf = b.(*z.Bloom)
518-
return !bf.Has(hash)
519-
}
520-
521-
bf, sz := t.readBloomFilter()
522-
t.opt.Cache.Set(t.bfCacheKey(), bf, int64(sz))
523-
return !bf.Has(hash)
524-
}
525-
526-
// readBloomFilter reads the bloom filter from the SST and returns its length
527-
// along with the bloom filter.
528-
func (t *Table) readBloomFilter() (*z.Bloom, int) {
529-
// Read bloom filter from the SST.
530-
data := t.readNoFail(t.indexStart, t.indexLen)
531-
index := pb.TableIndex{}
532-
var err error
533-
// Decrypt the table index if it is encrypted.
534-
if t.shouldDecrypt() {
535-
data, err = t.decrypt(data)
536-
y.Check(err)
537-
}
538-
y.Check(proto.Unmarshal(data, &index))
539-
540-
bf, err := z.JSONUnmarshal(index.BloomFilter)
541-
y.Check(err)
542-
return bf, len(index.BloomFilter)
543-
}
473+
func (t *Table) DoesNotHave(hash uint64) bool { return !t.bf.Has(hash) }
544474

545475
// VerifyChecksum verifies checksum for all blocks of table. This function is called by
546476
// OpenTable() function. This function is also called inside levelsController.VerifyChecksum().

table/table_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,13 @@ func buildTable(t *testing.T, keyValues [][]string, opts Options) *os.File {
7777
defer b.Close()
7878
// TODO: Add test for file garbage collection here. No files should be left after the tests here.
7979

80-
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32())
80+
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
8181
f, err := y.CreateSyncedFile(filename, true)
82-
require.NoError(t, err)
82+
if t != nil {
83+
require.NoError(t, err)
84+
} else {
85+
y.Check(err)
86+
}
8387

8488
sort.Slice(keyValues, func(i, j int) bool {
8589
return keyValues[i][0] < keyValues[j][0]

0 commit comments

Comments
 (0)