File tree Expand file tree Collapse file tree 1 file changed +16
-10
lines changed Expand file tree Collapse file tree 1 file changed +16
-10
lines changed Original file line number Diff line number Diff line change 1717
1818import sys
1919import random
20+ import math
2021
2122
2223class RDDSamplerBase (object ):
@@ -37,16 +38,21 @@ def getUniformSample(self):
3738 return self ._random .random ()
3839
3940 def getPoissonSample (self , mean ):
40- # here we simulate drawing numbers n_i ~ Poisson(lambda = 1/mean) by
41- # drawing a sequence of numbers delta_j ~ Exp(mean)
42- num_arrivals = 0
43- cur_time = self ._random .expovariate (mean )
44-
45- while cur_time < 1.0 :
46- cur_time += self ._random .expovariate (mean )
47- num_arrivals += 1
48-
49- return num_arrivals
41+ # Using Knuth's algorithm described in http://en.wikipedia.org/wiki/Poisson_distribution
42+ if mean < 20.0 : # one exp and k+1 random calls
43+ l = math .exp (- mean )
44+ p = self ._random .random ()
45+ k = 0
46+ while p > l :
47+ k += 1
48+ p *= self ._random .random ()
49+ else : # switch to the log domain, k+1 expovariate (random + log) calls
50+ p = self ._random .expovariate (mean )
51+ k = 0
52+ while p < 1.0 :
53+ k += 1
54+ p += self ._random .expovariate (mean )
55+ return k
5056
5157 def func (self , split , iterator ):
5258 raise NotImplementedError
You can’t perform that action at this time.
0 commit comments