Skip to content

Commit da78d5e

Browse files
committed
bloomfilter: load offset values directly
│ baseline.log │ field.log │ │ sec/op │ sec/op vs base │ FilterAdd/Add-32 34.23n ± 27% 24.99n ± 3% -26.99% (p=0.000 n=10) FilterAdd/Contains-32 26.53n ± 18% 25.72n ± 3% ~ (p=0.724 n=10) geomean 30.14n 25.35n -15.87% Change-Id: I0c9e186700f8b94539a7f18ff73a6c525f1a8ae4
1 parent cc5da98 commit da78d5e

File tree

1 file changed

+34
-29
lines changed

1 file changed

+34
-29
lines changed

bloomfilter/filter.go

+34-29
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,23 @@ type Filter struct {
3636
hashCount byte
3737
table []byte
3838

39-
tableSize fastdiv.Uint64
39+
offset byte
40+
rangeOffset byte
41+
tableSize fastdiv.Uint64
4042
}
4143

4244
// NewExplicit returns a new filter with the explicit seed and parameters.
4345
func NewExplicit(seed, hashCount byte, sizeInBytes int) *Filter {
46+
offset, rangeOffset := initialConditions(seed)
47+
4448
return &Filter{
4549
seed: seed,
4650
hashCount: hashCount,
4751
table: make([]byte, sizeInBytes),
4852

49-
tableSize: fastdiv.NewUint64(uint64(sizeInBytes)),
53+
offset: offset,
54+
rangeOffset: rangeOffset,
55+
tableSize: fastdiv.NewUint64(uint64(sizeInBytes)),
5056
}
5157
}
5258

@@ -82,59 +88,57 @@ func (filter *Filter) Add(pieceID storj.PieceID) {
8288
copy(id[:], pieceID[:])
8389
copy(id[len(pieceID):], pieceID[:])
8490

85-
offset, rangeOffset := initialConditions(filter.seed)
86-
for k := byte(0); k < filter.hashCount; k++ {
91+
offset, rangeOffset := filter.offset, filter.rangeOffset
92+
for h := int(filter.hashCount); h > 0; h-- {
8793
hash, bit := binary.LittleEndian.Uint64(id[offset:offset+8]), id[offset+8]
88-
offset = (offset + rangeOffset) % len(storj.PieceID{})
89-
9094
bucket := filter.tableSize.Mod(hash)
9195
filter.table[bucket] |= 1 << (bit % 8)
96+
offset = (offset + rangeOffset) % byte(len(storj.PieceID{}))
9297
}
9398
}
9499

95-
// AddFilter adds the given filter into the receiver. The filters
96-
// must have a matching seed and parameters.
97-
func (filter *Filter) AddFilter(operand *Filter) error {
98-
switch {
99-
case filter.seed != operand.seed:
100-
return errs.New("cannot merge: mismatched seed: expected %d but got %d", filter.seed, operand.seed)
101-
case filter.hashCount != operand.hashCount:
102-
return errs.New("cannot merge: mismatched hash count: expected %d but got %d", filter.hashCount, operand.hashCount)
103-
case len(filter.table) != len(operand.table):
104-
return errs.New("cannot merge: mismatched table size: expected %d but got %d", len(filter.table), len(operand.table))
105-
}
106-
for i := 0; i < len(filter.table); i++ {
107-
filter.table[i] |= operand.table[i]
108-
}
109-
return nil
110-
}
111-
112100
// Contains return true if pieceID may be in the set.
113101
func (filter *Filter) Contains(pieceID storj.PieceID) bool {
114102
var id [len(pieceID) * 2]byte
115103
copy(id[:], pieceID[:])
116104
copy(id[len(pieceID):], pieceID[:])
117105

118-
offset, rangeOffset := initialConditions(filter.seed)
106+
offset, rangeOffset := filter.offset, filter.rangeOffset
119107
for k := byte(0); k < filter.hashCount; k++ {
120108
hash, bit := binary.LittleEndian.Uint64(id[offset:offset+8]), id[offset+8]
121-
offset = (offset + rangeOffset) % len(storj.PieceID{})
122-
123109
bucket := filter.tableSize.Mod(hash)
124110
if filter.table[bucket]&(1<<(bit%8)) == 0 {
125111
return false
126112
}
113+
offset = (offset + rangeOffset) % byte(len(storj.PieceID{}))
127114
}
128115

129116
return true
130117
}
131118

132-
func initialConditions(seed byte) (initialOffset, rangeOffset int) {
133-
initialOffset = int(seed % 32)
134-
rangeOffset = int(rangeOffsets[int(seed/32)%len(rangeOffsets)])
119+
func initialConditions(seed byte) (initialOffset, rangeOffset byte) {
120+
initialOffset = seed % 32
121+
rangeOffset = rangeOffsets[int(seed/32)%len(rangeOffsets)]
135122
return initialOffset, rangeOffset
136123
}
137124

125+
// AddFilter adds the given filter into the receiver. The filters
126+
// must have a matching seed and parameters.
127+
func (filter *Filter) AddFilter(operand *Filter) error {
128+
switch {
129+
case filter.seed != operand.seed:
130+
return errs.New("cannot merge: mismatched seed: expected %d but got %d", filter.seed, operand.seed)
131+
case filter.hashCount != operand.hashCount:
132+
return errs.New("cannot merge: mismatched hash count: expected %d but got %d", filter.hashCount, operand.hashCount)
133+
case len(filter.table) != len(operand.table):
134+
return errs.New("cannot merge: mismatched table size: expected %d but got %d", len(filter.table), len(operand.table))
135+
}
136+
for i := 0; i < len(filter.table); i++ {
137+
filter.table[i] |= operand.table[i]
138+
}
139+
return nil
140+
}
141+
138142
// NewFromBytes decodes the filter from a sequence of bytes.
139143
//
140144
// Note: data will be referenced inside the table.
@@ -155,6 +159,7 @@ func NewFromBytes(data []byte) (*Filter, error) {
155159
return nil, errs.New("invalid hash count %d", filter.hashCount)
156160
}
157161

162+
filter.offset, filter.rangeOffset = initialConditions(filter.seed)
158163
filter.tableSize = fastdiv.NewUint64(uint64(len(filter.table)))
159164

160165
return filter, nil

0 commit comments

Comments
 (0)