Skip to content

Commit

Permalink
Revert "Add support for caching bloomfilters (hypermodeinc#1204)"
Browse files Browse the repository at this point in the history
This reverts commit 4676ca9.
  • Loading branch information
adwinsky committed Mar 11, 2020
1 parent 617ed7c commit 2d7c7b4
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 86 deletions.
2 changes: 1 addition & 1 deletion table/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestTableIndex(t *testing.T) {
keysCount := 10000
for _, opt := range opts {
builder := NewTableBuilder(opt)
filename := fmt.Sprintf("%s%c%d.sst", os.TempDir(), os.PathSeparator, rand.Uint32())
filename := fmt.Sprintf("%s%c%d.sst", os.TempDir(), os.PathSeparator, rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
require.NoError(t, err)

Expand Down
96 changes: 13 additions & 83 deletions table/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package table

import (
"crypto/aes"
"encoding/binary"
"fmt"
"io"
"math"
Expand Down Expand Up @@ -82,7 +81,7 @@ type TableInterface interface {
DoesNotHave(hash uint64) bool
}

// Table represents a loaded table file with the info we have about it.
// Table represents a loaded table file with the info we have about it
type Table struct {
sync.Mutex

Expand All @@ -98,11 +97,10 @@ type Table struct {
smallest, biggest []byte // Smallest and largest keys (with timestamps).
id uint64 // file id, part of filename

bf *z.Bloom
Checksum []byte
// Stores the total size of key-values stored in this table (including the size on vlog).
estimatedSize uint64
indexStart int
indexLen int

IsInmemory bool // Set to true if the table is on level 0 and opened in memory.
opt *Options
Expand Down Expand Up @@ -148,13 +146,6 @@ func (t *Table) DecrRef() error {
if err := os.Remove(filename); err != nil {
return err
}
// Delete all blocks from the cache.
for i := range t.blockIndex {
t.opt.Cache.Del(t.blockCacheKey(i))
}
// Delete bloom filter from the cache.
t.opt.Cache.Del(t.bfCacheKey())

}
return nil
}
Expand Down Expand Up @@ -345,12 +336,10 @@ func (t *Table) readIndex() error {
// Read index size from the footer.
readPos -= 4
buf = t.readNoFail(readPos, 4)
t.indexLen = int(y.BytesToU32(buf))

indexLen := int(y.BytesToU32(buf))
// Read index.
readPos -= t.indexLen
t.indexStart = readPos
data := t.readNoFail(readPos, t.indexLen)
readPos -= indexLen
data := t.readNoFail(readPos, indexLen)

if err := y.VerifyChecksum(data, expectedChk); err != nil {
return y.Wrapf(err, "failed to verify checksum for table: %s", t.Filename())
Expand All @@ -369,18 +358,11 @@ func (t *Table) readIndex() error {
y.Check(err)

t.estimatedSize = index.EstimatedSize
t.blockIndex = index.Offsets

// Avoid the cost of unmarshalling the bloom filters if the cache is absent.
if t.opt.Cache != nil {
var bf *z.Bloom
if bf, err = z.JSONUnmarshal(index.BloomFilter); err != nil {
return y.Wrapf(err, "failed to unmarshal bloom filter for the table %d in Table.readIndex",
t.id)
}

t.opt.Cache.Set(t.bfCacheKey(), bf, int64(len(index.BloomFilter)))
if t.bf, err = z.JSONUnmarshal(index.BloomFilter); err != nil {
return y.Wrapf(err, "failed to unmarshal bloom filter for the table %d in Table.readIndex",
t.id)
}
t.blockIndex = index.Offsets
return nil
}

Expand Down Expand Up @@ -461,25 +443,10 @@ func (t *Table) block(idx int) (*block, error) {
return blk, nil
}

// bfCacheKey returns the cache key for bloom filter.
func (t *Table) bfCacheKey() []byte {
y.AssertTrue(t.id < math.MaxUint32)
buf := make([]byte, 4)
binary.BigEndian.PutUint32(buf, uint32(t.id))

// Without the "bf" prefix, we will have conflict with the blockCacheKey.
return append([]byte("bf"), buf...)
}

func (t *Table) blockCacheKey(idx int) []byte {
y.AssertTrue(t.id < math.MaxUint32)
func (t *Table) blockCacheKey(idx int) uint64 {
y.AssertTrue(t.ID() < math.MaxUint32)
y.AssertTrue(uint32(idx) < math.MaxUint32)

buf := make([]byte, 8)
// Assume t.ID does not overflow uint32.
binary.BigEndian.PutUint32(buf[:4], uint32(t.ID()))
binary.BigEndian.PutUint32(buf[4:], uint32(idx))
return buf
return (t.ID() << 32) | uint64(idx)
}

// EstimatedSize returns the total size of key-values stored in this table (including the
Expand All @@ -503,44 +470,7 @@ func (t *Table) ID() uint64 { return t.id }

// DoesNotHave returns true if (but not "only if") the table does not have the key hash.
// It does a bloom filter lookup.
func (t *Table) DoesNotHave(hash uint64) bool {
var bf *z.Bloom

// Return fast if cache is absent.
if t.opt.Cache == nil {
bf, _ := t.readBloomFilter()
return !bf.Has(hash)
}

// Check if the bloomfilter exists in the cache.
if b, ok := t.opt.Cache.Get(t.bfCacheKey()); b != nil && ok {
bf = b.(*z.Bloom)
return !bf.Has(hash)
}

bf, sz := t.readBloomFilter()
t.opt.Cache.Set(t.bfCacheKey(), bf, int64(sz))
return !bf.Has(hash)
}

// readBloomFilter reads the bloom filter from the SST and returns its length
// along with the bloom filter.
func (t *Table) readBloomFilter() (*z.Bloom, int) {
// Read bloom filter from the SST.
data := t.readNoFail(t.indexStart, t.indexLen)
index := pb.TableIndex{}
var err error
// Decrypt the table index if it is encrypted.
if t.shouldDecrypt() {
data, err = t.decrypt(data)
y.Check(err)
}
y.Check(proto.Unmarshal(data, &index))

bf, err := z.JSONUnmarshal(index.BloomFilter)
y.Check(err)
return bf, len(index.BloomFilter)
}
func (t *Table) DoesNotHave(hash uint64) bool { return !t.bf.Has(hash) }

// VerifyChecksum verifies checksum for all blocks of table. This function is called by
// OpenTable() function. This function is also called inside levelsController.VerifyChecksum().
Expand Down
8 changes: 6 additions & 2 deletions table/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,13 @@ func buildTable(t *testing.T, keyValues [][]string, opts Options) *os.File {
defer b.Close()
// TODO: Add test for file garbage collection here. No files should be left after the tests here.

filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32())
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.CreateSyncedFile(filename, true)
require.NoError(t, err)
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}

sort.Slice(keyValues, func(i, j int) bool {
return keyValues[i][0] < keyValues[j][0]
Expand Down

0 comments on commit 2d7c7b4

Please sign in to comment.