Skip to content

Commit 99d1bbb

Browse files
authored
default to 128bit hashing for collision checks (#98)
* 128bit hashing by default * remove unused, add documentation * use config func if provided * test fix * fix review comments, add todo for memhash128 * fix
1 parent 1eea1b1 commit 99d1bbb

File tree

10 files changed

+349
-338
lines changed

10 files changed

+349
-338
lines changed

README.md

+6-12
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ Ristretto is usable but still under active development. We expect it to be produ
3838
* [OnEvict](#Config)
3939
* [KeyToHash](#Config)
4040
* [Cost](#Config)
41-
* [Hashes](#Config)
4241
* [Benchmarks](#Benchmarks)
4342
* [Hit Ratios](#Hit-Ratios)
4443
* [Search](#Search)
@@ -109,14 +108,18 @@ If for some reason you see Get performance decreasing with lots of contention (y
109108

110109
Metrics is true when you want real-time logging of a variety of stats. The reason this is a Config flag is because there's a 10% throughput performance overhead.
111110

112-
**OnEvict** `func(keyHash uint64, value interface{}, cost int64)`
111+
**OnEvict** `func(hashes [2]uint64, value interface{}, cost int64)`
113112

114113
OnEvict is called for every eviction.
115114

116-
**KeyToHash** `func(key interface{}) uint64`
115+
**KeyToHash** `func(key interface{}) [2]uint64`
117116

118117
KeyToHash is the hashing algorithm used for every key. If this is nil, Ristretto has a variety of [defaults depending on the underlying interface type](https://github.com/dgraph-io/ristretto/blob/master/z/z.go#L19-L41).
119118

119+
Note that if you want 128bit hashes you should use the full `[2]uint64`,
120+
otherwise just fill the `uint64` at the `0` position and it will behave like
121+
any 64bit hash.
122+
120123
**Cost** `func(value interface{}) int64`
121124

122125
Cost is an optional function you can pass to the Config in order to evaluate
@@ -129,15 +132,6 @@ To signal to Ristretto that you'd like to use this Cost function:
129132
1. Set the Cost field to a non-nil function.
130133
2. When calling Set for new items or item updates, use a `cost` of 0.
131134

132-
**Hashes** `uint8`
133-
134-
Hashes is the number of 64-bit hashes to chain and use as unique identifiers.
135-
For example, if Hashes is 2, Ristretto will use 128-bit hashes to verify and
136-
protect against collisions. If Hashes is 3, Ristretto will use 192-bit hashes,
137-
etc.
138-
139-
If this value is 0 or 1, 64-bit hashes will be used.
140-
141135
## Benchmarks
142136

143137
The benchmarks can be found in https://github.com/dgraph-io/benchmarks/tree/master/cachebench/ristretto.

cache.go

+38-47
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ type Cache struct {
4848
// contention
4949
setBuf chan *item
5050
// onEvict is called for item evictions
51-
onEvict func(uint64, interface{}, int64)
51+
onEvict func(uint64, uint64, interface{}, int64)
5252
// KeyToHash function is used to customize the key hashing algorithm.
5353
// Each key will be hashed using the provided function. If keyToHash value
5454
// is not set, the default keyToHash function is used.
55-
keyToHash func(interface{}, uint8) uint64
55+
keyToHash func(interface{}) (uint64, uint64)
5656
// stop is used to stop the processItems goroutine
5757
stop chan struct{}
5858
// cost calculates cost from a value
@@ -94,23 +94,15 @@ type Config struct {
9494
Metrics bool
9595
// OnEvict is called for every eviction and passes the hashed key, value,
9696
// and cost to the function.
97-
OnEvict func(key uint64, value interface{}, cost int64)
97+
OnEvict func(key, conflict uint64, value interface{}, cost int64)
9898
// KeyToHash function is used to customize the key hashing algorithm.
9999
// Each key will be hashed using the provided function. If keyToHash value
100100
// is not set, the default keyToHash function is used.
101-
KeyToHash func(key interface{}, seed uint8) uint64
101+
KeyToHash func(key interface{}) (uint64, uint64)
102102
// Cost evaluates a value and outputs a corresponding cost. This function
103103
// is ran after Set is called for a new item or an item update with a cost
104104
// param of 0.
105105
Cost func(value interface{}) int64
106-
// Hashes is the number of 64-bit hashes to chain and use as each item's
107-
// unique identifier. For example, setting Hashes to 2 will set internal
108-
// keys to 128-bits and therefore very little probability of colliding with
109-
// another key-value item in the cache. To just use 64-bit keys, set this
110-
// value to 0 or 1.
111-
//
112-
// The larger this value is, the worse throughput performance will be.
113-
Hashes uint8
114106
}
115107

116108
type itemFlag byte
@@ -123,11 +115,11 @@ const (
123115

124116
// item is passed to setBuf so items can eventually be added to the cache
125117
type item struct {
126-
flag itemFlag
127-
key interface{}
128-
keyHash uint64
129-
value interface{}
130-
cost int64
118+
flag itemFlag
119+
key uint64
120+
conflict uint64
121+
value interface{}
122+
cost int64
131123
}
132124

133125
// NewCache returns a new Cache instance and any configuration errors, if any.
@@ -142,7 +134,7 @@ func NewCache(config *Config) (*Cache, error) {
142134
}
143135
policy := newPolicy(config.NumCounters, config.MaxCost)
144136
cache := &Cache{
145-
store: newStore(config.Hashes),
137+
store: newStore(),
146138
policy: policy,
147139
getBuf: newRingBuffer(policy, config.BufferItems),
148140
setBuf: make(chan *item, setBufSize),
@@ -171,13 +163,13 @@ func (c *Cache) Get(key interface{}) (interface{}, bool) {
171163
if c == nil || key == nil {
172164
return nil, false
173165
}
174-
hashed := c.keyToHash(key, 0)
175-
c.getBuf.Push(hashed)
176-
value, ok := c.store.Get(hashed, key)
166+
keyHash, conflictHash := c.keyToHash(key)
167+
c.getBuf.Push(keyHash)
168+
value, ok := c.store.Get(keyHash, conflictHash)
177169
if ok {
178-
c.Metrics.add(hit, hashed, 1)
170+
c.Metrics.add(hit, keyHash, 1)
179171
} else {
180-
c.Metrics.add(miss, hashed, 1)
172+
c.Metrics.add(miss, keyHash, 1)
181173
}
182174
return value, ok
183175
}
@@ -195,24 +187,25 @@ func (c *Cache) Set(key, value interface{}, cost int64) bool {
195187
if c == nil || key == nil {
196188
return false
197189
}
190+
keyHash, conflictHash := c.keyToHash(key)
198191
i := &item{
199-
flag: itemNew,
200-
key: key,
201-
keyHash: c.keyToHash(key, 0),
202-
value: value,
203-
cost: cost,
192+
flag: itemNew,
193+
key: keyHash,
194+
conflict: conflictHash,
195+
value: value,
196+
cost: cost,
204197
}
205198
// attempt to immediately update hashmap value and set flag to update so the
206199
// cost is eventually updated
207-
if c.store.Update(i.keyHash, i.key, i.value) {
200+
if c.store.Update(keyHash, conflictHash, i.value) {
208201
i.flag = itemUpdate
209202
}
210203
// attempt to send item to policy
211204
select {
212205
case c.setBuf <- i:
213206
return true
214207
default:
215-
c.Metrics.add(dropSets, i.keyHash, 1)
208+
c.Metrics.add(dropSets, keyHash, 1)
216209
return false
217210
}
218211
}
@@ -222,10 +215,11 @@ func (c *Cache) Del(key interface{}) {
222215
if c == nil || key == nil {
223216
return
224217
}
218+
keyHash, conflictHash := c.keyToHash(key)
225219
c.setBuf <- &item{
226-
flag: itemDelete,
227-
key: key,
228-
keyHash: c.keyToHash(key, 0),
220+
flag: itemDelete,
221+
key: keyHash,
222+
conflict: conflictHash,
229223
}
230224
}
231225

@@ -268,28 +262,25 @@ func (c *Cache) processItems() {
268262
}
269263
switch i.flag {
270264
case itemNew:
271-
victims, added := c.policy.Add(i.keyHash, i.cost)
265+
victims, added := c.policy.Add(i.key, i.cost)
272266
if added {
273-
// item was accepted by the policy, so add to the hashmap
274-
c.store.Set(i.keyHash, i.key, i.value)
275-
c.Metrics.add(keyAdd, i.keyHash, 1)
276-
c.Metrics.add(costAdd, i.keyHash, uint64(i.cost))
267+
c.store.Set(i.key, i.conflict, i.value)
268+
c.Metrics.add(keyAdd, i.key, 1)
269+
c.Metrics.add(costAdd, i.key, uint64(i.cost))
277270
}
278271
for _, victim := range victims {
279-
// force get with no collision checking because
280-
// we don't have access to the victim's key
281-
victim.value = c.store.Del(victim.keyHash, nil)
272+
victim.conflict, victim.value = c.store.Del(victim.key, 0)
282273
if c.onEvict != nil {
283-
c.onEvict(victim.keyHash, victim.value, victim.cost)
274+
c.onEvict(victim.key, victim.conflict, victim.value, victim.cost)
284275
}
285-
c.Metrics.add(keyEvict, victim.keyHash, 1)
286-
c.Metrics.add(costEvict, victim.keyHash, uint64(victim.cost))
276+
c.Metrics.add(keyEvict, victim.key, 1)
277+
c.Metrics.add(costEvict, victim.key, uint64(victim.cost))
287278
}
288279
case itemUpdate:
289-
c.policy.Update(i.keyHash, i.cost)
280+
c.policy.Update(i.key, i.cost)
290281
case itemDelete:
291-
c.policy.Del(i.keyHash)
292-
c.store.Del(i.keyHash, i.key)
282+
c.policy.Del(i.key)
283+
c.store.Del(i.key, i.conflict)
293284
}
294285
case <-c.stop:
295286
return

0 commit comments

Comments
 (0)