Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

default to 128bit hashing for collision checks #98

Merged
merged 9 commits into from
Nov 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ Ristretto is usable but still under active development. We expect it to be produ
* [OnEvict](#Config)
* [KeyToHash](#Config)
* [Cost](#Config)
* [Hashes](#Config)
* [Benchmarks](#Benchmarks)
* [Hit Ratios](#Hit-Ratios)
* [Search](#Search)
Expand Down Expand Up @@ -109,14 +108,18 @@ If for some reason you see Get performance decreasing with lots of contention (y

Metrics is true when you want real-time logging of a variety of stats. The reason this is a Config flag is because there's a 10% throughput performance overhead.

**OnEvict** `func(keyHash uint64, value interface{}, cost int64)`
**OnEvict** `func(hashes [2]uint64, value interface{}, cost int64)`

OnEvict is called for every eviction.

**KeyToHash** `func(key interface{}) uint64`
**KeyToHash** `func(key interface{}) [2]uint64`

KeyToHash is the hashing algorithm used for every key. If this is nil, Ristretto has a variety of [defaults depending on the underlying interface type](https://github.com/dgraph-io/ristretto/blob/master/z/z.go#L19-L41).

Note that if you want 128bit hashes you should use the full `[2]uint64`,
otherwise just fill the `uint64` at the `0` position and it will behave like
any 64bit hash.

**Cost** `func(value interface{}) int64`

Cost is an optional function you can pass to the Config in order to evaluate
Expand All @@ -129,15 +132,6 @@ To signal to Ristretto that you'd like to use this Cost function:
1. Set the Cost field to a non-nil function.
2. When calling Set for new items or item updates, use a `cost` of 0.

**Hashes** `uint8`

Hashes is the number of 64-bit hashes to chain and use as unique identifiers.
For example, if Hashes is 2, Ristretto will use 128-bit hashes to verify and
protect against collisions. If Hashes is 3, Ristretto will use 192-bit hashes,
etc.

If this value is 0 or 1, 64-bit hashes will be used.

## Benchmarks

The benchmarks can be found in https://github.com/dgraph-io/benchmarks/tree/master/cachebench/ristretto.
Expand Down
85 changes: 38 additions & 47 deletions cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ type Cache struct {
// contention
setBuf chan *item
// onEvict is called for item evictions
onEvict func(uint64, interface{}, int64)
onEvict func(uint64, uint64, interface{}, int64)
// KeyToHash function is used to customize the key hashing algorithm.
// Each key will be hashed using the provided function. If keyToHash value
// is not set, the default keyToHash function is used.
keyToHash func(interface{}, uint8) uint64
keyToHash func(interface{}) (uint64, uint64)
// stop is used to stop the processItems goroutine
stop chan struct{}
// cost calculates cost from a value
Expand Down Expand Up @@ -94,23 +94,15 @@ type Config struct {
Metrics bool
// OnEvict is called for every eviction and passes the hashed key, value,
// and cost to the function.
OnEvict func(key uint64, value interface{}, cost int64)
OnEvict func(key, conflict uint64, value interface{}, cost int64)
// KeyToHash function is used to customize the key hashing algorithm.
// Each key will be hashed using the provided function. If keyToHash value
// is not set, the default keyToHash function is used.
KeyToHash func(key interface{}, seed uint8) uint64
KeyToHash func(key interface{}) (uint64, uint64)
// Cost evaluates a value and outputs a corresponding cost. This function
// is ran after Set is called for a new item or an item update with a cost
// param of 0.
Cost func(value interface{}) int64
// Hashes is the number of 64-bit hashes to chain and use as each item's
// unique identifier. For example, setting Hashes to 2 will set internal
// keys to 128-bits and therefore very little probability of colliding with
// another key-value item in the cache. To just use 64-bit keys, set this
// value to 0 or 1.
//
// The larger this value is, the worse throughput performance will be.
Hashes uint8
}

type itemFlag byte
Expand All @@ -123,11 +115,11 @@ const (

// item is passed to setBuf so items can eventually be added to the cache
type item struct {
flag itemFlag
key interface{}
keyHash uint64
value interface{}
cost int64
flag itemFlag
key uint64
conflict uint64
value interface{}
cost int64
}

// NewCache returns a new Cache instance and any configuration errors, if any.
Expand All @@ -142,7 +134,7 @@ func NewCache(config *Config) (*Cache, error) {
}
policy := newPolicy(config.NumCounters, config.MaxCost)
cache := &Cache{
store: newStore(config.Hashes),
store: newStore(),
policy: policy,
getBuf: newRingBuffer(policy, config.BufferItems),
setBuf: make(chan *item, setBufSize),
Expand Down Expand Up @@ -171,13 +163,13 @@ func (c *Cache) Get(key interface{}) (interface{}, bool) {
if c == nil || key == nil {
return nil, false
}
hashed := c.keyToHash(key, 0)
c.getBuf.Push(hashed)
value, ok := c.store.Get(hashed, key)
keyHash, conflictHash := c.keyToHash(key)
c.getBuf.Push(keyHash)
value, ok := c.store.Get(keyHash, conflictHash)
if ok {
c.Metrics.add(hit, hashed, 1)
c.Metrics.add(hit, keyHash, 1)
} else {
c.Metrics.add(miss, hashed, 1)
c.Metrics.add(miss, keyHash, 1)
}
return value, ok
}
Expand All @@ -195,24 +187,25 @@ func (c *Cache) Set(key, value interface{}, cost int64) bool {
if c == nil || key == nil {
return false
}
keyHash, conflictHash := c.keyToHash(key)
i := &item{
flag: itemNew,
key: key,
keyHash: c.keyToHash(key, 0),
value: value,
cost: cost,
flag: itemNew,
key: keyHash,
conflict: conflictHash,
value: value,
cost: cost,
}
// attempt to immediately update hashmap value and set flag to update so the
// cost is eventually updated
if c.store.Update(i.keyHash, i.key, i.value) {
if c.store.Update(keyHash, conflictHash, i.value) {
i.flag = itemUpdate
}
// attempt to send item to policy
select {
case c.setBuf <- i:
return true
default:
c.Metrics.add(dropSets, i.keyHash, 1)
c.Metrics.add(dropSets, keyHash, 1)
return false
}
}
Expand All @@ -222,10 +215,11 @@ func (c *Cache) Del(key interface{}) {
if c == nil || key == nil {
return
}
keyHash, conflictHash := c.keyToHash(key)
c.setBuf <- &item{
flag: itemDelete,
key: key,
keyHash: c.keyToHash(key, 0),
flag: itemDelete,
key: keyHash,
conflict: conflictHash,
}
}

Expand Down Expand Up @@ -268,28 +262,25 @@ func (c *Cache) processItems() {
}
switch i.flag {
case itemNew:
victims, added := c.policy.Add(i.keyHash, i.cost)
victims, added := c.policy.Add(i.key, i.cost)
if added {
// item was accepted by the policy, so add to the hashmap
c.store.Set(i.keyHash, i.key, i.value)
c.Metrics.add(keyAdd, i.keyHash, 1)
c.Metrics.add(costAdd, i.keyHash, uint64(i.cost))
c.store.Set(i.key, i.conflict, i.value)
c.Metrics.add(keyAdd, i.key, 1)
c.Metrics.add(costAdd, i.key, uint64(i.cost))
}
for _, victim := range victims {
// force get with no collision checking because
// we don't have access to the victim's key
victim.value = c.store.Del(victim.keyHash, nil)
victim.conflict, victim.value = c.store.Del(victim.key, 0)
if c.onEvict != nil {
c.onEvict(victim.keyHash, victim.value, victim.cost)
c.onEvict(victim.key, victim.conflict, victim.value, victim.cost)
}
c.Metrics.add(keyEvict, victim.keyHash, 1)
c.Metrics.add(costEvict, victim.keyHash, uint64(victim.cost))
c.Metrics.add(keyEvict, victim.key, 1)
c.Metrics.add(costEvict, victim.key, uint64(victim.cost))
}
case itemUpdate:
c.policy.Update(i.keyHash, i.cost)
c.policy.Update(i.key, i.cost)
case itemDelete:
c.policy.Del(i.keyHash)
c.store.Del(i.keyHash, i.key)
c.policy.Del(i.key)
c.store.Del(i.key, i.conflict)
}
case <-c.stop:
return
Expand Down
Loading