Skip to content

Commit

Permalink
default to 128bit hashing for collision checks (#98)
Browse files Browse the repository at this point in the history
* 128bit hashing by default

* remove unused, add documentation

* use config func if provided

* test fix

* fix review comments, add todo for memhash128

* fix
  • Loading branch information
karlmcguire authored Nov 14, 2019
1 parent 1eea1b1 commit 99d1bbb
Show file tree
Hide file tree
Showing 10 changed files with 349 additions and 338 deletions.
18 changes: 6 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ Ristretto is usable but still under active development. We expect it to be produ
* [OnEvict](#Config)
* [KeyToHash](#Config)
* [Cost](#Config)
* [Hashes](#Config)
* [Benchmarks](#Benchmarks)
* [Hit Ratios](#Hit-Ratios)
* [Search](#Search)
Expand Down Expand Up @@ -109,14 +108,18 @@ If for some reason you see Get performance decreasing with lots of contention (y

Metrics is true when you want real-time logging of a variety of stats. The reason this is a Config flag is because there's a 10% throughput performance overhead.

**OnEvict** `func(keyHash uint64, value interface{}, cost int64)`
**OnEvict** `func(hashes [2]uint64, value interface{}, cost int64)`

OnEvict is called for every eviction.

**KeyToHash** `func(key interface{}) uint64`
**KeyToHash** `func(key interface{}) [2]uint64`

KeyToHash is the hashing algorithm used for every key. If this is nil, Ristretto has a variety of [defaults depending on the underlying interface type](https://github.com/dgraph-io/ristretto/blob/master/z/z.go#L19-L41).

Note that if you want 128bit hashes you should use the full `[2]uint64`,
otherwise just fill the `uint64` at the `0` position and it will behave like
any 64bit hash.

**Cost** `func(value interface{}) int64`

Cost is an optional function you can pass to the Config in order to evaluate
Expand All @@ -129,15 +132,6 @@ To signal to Ristretto that you'd like to use this Cost function:
1. Set the Cost field to a non-nil function.
2. When calling Set for new items or item updates, use a `cost` of 0.

**Hashes** `uint8`

Hashes is the number of 64-bit hashes to chain and use as unique identifiers.
For example, if Hashes is 2, Ristretto will use 128-bit hashes to verify and
protect against collisions. If Hashes is 3, Ristretto will use 192-bit hashes,
etc.

If this value is 0 or 1, 64-bit hashes will be used.

## Benchmarks

The benchmarks can be found in https://github.com/dgraph-io/benchmarks/tree/master/cachebench/ristretto.
Expand Down
85 changes: 38 additions & 47 deletions cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ type Cache struct {
// contention
setBuf chan *item
// onEvict is called for item evictions
onEvict func(uint64, interface{}, int64)
onEvict func(uint64, uint64, interface{}, int64)
// KeyToHash function is used to customize the key hashing algorithm.
// Each key will be hashed using the provided function. If keyToHash value
// is not set, the default keyToHash function is used.
keyToHash func(interface{}, uint8) uint64
keyToHash func(interface{}) (uint64, uint64)
// stop is used to stop the processItems goroutine
stop chan struct{}
// cost calculates cost from a value
Expand Down Expand Up @@ -94,23 +94,15 @@ type Config struct {
Metrics bool
// OnEvict is called for every eviction and passes the hashed key, value,
// and cost to the function.
OnEvict func(key uint64, value interface{}, cost int64)
OnEvict func(key, conflict uint64, value interface{}, cost int64)
// KeyToHash function is used to customize the key hashing algorithm.
// Each key will be hashed using the provided function. If keyToHash value
// is not set, the default keyToHash function is used.
KeyToHash func(key interface{}, seed uint8) uint64
KeyToHash func(key interface{}) (uint64, uint64)
// Cost evaluates a value and outputs a corresponding cost. This function
// is ran after Set is called for a new item or an item update with a cost
// param of 0.
Cost func(value interface{}) int64
// Hashes is the number of 64-bit hashes to chain and use as each item's
// unique identifier. For example, setting Hashes to 2 will set internal
// keys to 128-bits and therefore very little probability of colliding with
// another key-value item in the cache. To just use 64-bit keys, set this
// value to 0 or 1.
//
// The larger this value is, the worse throughput performance will be.
Hashes uint8
}

type itemFlag byte
Expand All @@ -123,11 +115,11 @@ const (

// item is passed to setBuf so items can eventually be added to the cache
type item struct {
flag itemFlag
key interface{}
keyHash uint64
value interface{}
cost int64
flag itemFlag
key uint64
conflict uint64
value interface{}
cost int64
}

// NewCache returns a new Cache instance and any configuration errors, if any.
Expand All @@ -142,7 +134,7 @@ func NewCache(config *Config) (*Cache, error) {
}
policy := newPolicy(config.NumCounters, config.MaxCost)
cache := &Cache{
store: newStore(config.Hashes),
store: newStore(),
policy: policy,
getBuf: newRingBuffer(policy, config.BufferItems),
setBuf: make(chan *item, setBufSize),
Expand Down Expand Up @@ -171,13 +163,13 @@ func (c *Cache) Get(key interface{}) (interface{}, bool) {
if c == nil || key == nil {
return nil, false
}
hashed := c.keyToHash(key, 0)
c.getBuf.Push(hashed)
value, ok := c.store.Get(hashed, key)
keyHash, conflictHash := c.keyToHash(key)
c.getBuf.Push(keyHash)
value, ok := c.store.Get(keyHash, conflictHash)
if ok {
c.Metrics.add(hit, hashed, 1)
c.Metrics.add(hit, keyHash, 1)
} else {
c.Metrics.add(miss, hashed, 1)
c.Metrics.add(miss, keyHash, 1)
}
return value, ok
}
Expand All @@ -195,24 +187,25 @@ func (c *Cache) Set(key, value interface{}, cost int64) bool {
if c == nil || key == nil {
return false
}
keyHash, conflictHash := c.keyToHash(key)
i := &item{
flag: itemNew,
key: key,
keyHash: c.keyToHash(key, 0),
value: value,
cost: cost,
flag: itemNew,
key: keyHash,
conflict: conflictHash,
value: value,
cost: cost,
}
// attempt to immediately update hashmap value and set flag to update so the
// cost is eventually updated
if c.store.Update(i.keyHash, i.key, i.value) {
if c.store.Update(keyHash, conflictHash, i.value) {
i.flag = itemUpdate
}
// attempt to send item to policy
select {
case c.setBuf <- i:
return true
default:
c.Metrics.add(dropSets, i.keyHash, 1)
c.Metrics.add(dropSets, keyHash, 1)
return false
}
}
Expand All @@ -222,10 +215,11 @@ func (c *Cache) Del(key interface{}) {
if c == nil || key == nil {
return
}
keyHash, conflictHash := c.keyToHash(key)
c.setBuf <- &item{
flag: itemDelete,
key: key,
keyHash: c.keyToHash(key, 0),
flag: itemDelete,
key: keyHash,
conflict: conflictHash,
}
}

Expand Down Expand Up @@ -268,28 +262,25 @@ func (c *Cache) processItems() {
}
switch i.flag {
case itemNew:
victims, added := c.policy.Add(i.keyHash, i.cost)
victims, added := c.policy.Add(i.key, i.cost)
if added {
// item was accepted by the policy, so add to the hashmap
c.store.Set(i.keyHash, i.key, i.value)
c.Metrics.add(keyAdd, i.keyHash, 1)
c.Metrics.add(costAdd, i.keyHash, uint64(i.cost))
c.store.Set(i.key, i.conflict, i.value)
c.Metrics.add(keyAdd, i.key, 1)
c.Metrics.add(costAdd, i.key, uint64(i.cost))
}
for _, victim := range victims {
// force get with no collision checking because
// we don't have access to the victim's key
victim.value = c.store.Del(victim.keyHash, nil)
victim.conflict, victim.value = c.store.Del(victim.key, 0)
if c.onEvict != nil {
c.onEvict(victim.keyHash, victim.value, victim.cost)
c.onEvict(victim.key, victim.conflict, victim.value, victim.cost)
}
c.Metrics.add(keyEvict, victim.keyHash, 1)
c.Metrics.add(costEvict, victim.keyHash, uint64(victim.cost))
c.Metrics.add(keyEvict, victim.key, 1)
c.Metrics.add(costEvict, victim.key, uint64(victim.cost))
}
case itemUpdate:
c.policy.Update(i.keyHash, i.cost)
c.policy.Update(i.key, i.cost)
case itemDelete:
c.policy.Del(i.keyHash)
c.store.Del(i.keyHash, i.key)
c.policy.Del(i.key)
c.store.Del(i.key, i.conflict)
}
case <-c.stop:
return
Expand Down
Loading

0 comments on commit 99d1bbb

Please sign in to comment.