-
Notifications
You must be signed in to change notification settings - Fork 2
/
sticky.go
92 lines (83 loc) · 1.9 KB
/
sticky.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package countish
import (
"math"
"math/rand"
)
var Rand = rand.Float64
var RandCoin = rand.Int31n
type StickySampler struct {
errorTolerance float64
support float64
S map[string]float64
R float64
failureProb float64
N float64
t float64
RequiredSamples int
}
func NewSampler(support, errorTolerance, failureProb float64) *StickySampler {
twoT := 2 / errorTolerance * math.Log(1/(support*failureProb))
return &StickySampler{
errorTolerance: errorTolerance,
support: support,
R: 1,
failureProb: failureProb,
t: twoT,
RequiredSamples: int(twoT),
S: make(map[string]float64),
}
}
const sucessful = 0
func (s *StickySampler) prune() {
for key, val := range s.S {
// repeatedly toss coin
// until coin toss is successful.
// todo this can probably be derived
// by looking at how close to 0
// a number in [0, 1) is.
for {
if RandCoin(2) == sucessful {
break
}
// diminish by one for every
// unsucessful outcome
val--
// delete if needed
if val <= 0 {
delete(s.S, key)
} else {
s.S[key] = val
}
}
}
}
// ItemsAboveThreshold returns a list of items that occur more than threshold, along
// with their frequencies. threshold is in the range [0,1]
func (s *StickySampler) ItemsAboveThreshold(threshold float64) []Entry {
var results []Entry
for key, f := range s.S {
if f >= (threshold-s.errorTolerance)*s.N {
results = append(results, Entry{Key: key, Frequency: f/s.N + s.support})
}
}
return results
}
// Observe records a new sample
func (s *StickySampler) Observe(key string) {
s.N++
count := s.N
if count > s.t {
s.t *= 2
s.R *= 2
s.prune()
}
if _, exists := s.S[key]; !exists {
// determine if value should be sampled
shouldSample := Rand() <= 1/s.R
if !shouldSample {
return
}
}
s.S[key]++
return
}