-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathkde.py
72 lines (58 loc) · 2.25 KB
/
kde.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
An implementation of the kde bandwidth selection method outlined in:
Z. I. Botev, J. F. Grotowski, and D. P. Kroese. Kernel density
estimation via diffusion. The Annals of Statistics, 38(5):2916-2957, 2010.
Based on the implementation in Matlab by Zdravko Botev.
Daniel B. Smith, PhD
Updated 1-23-2013
"""
from __future__ import division
import scipy as sci
import scipy.optimize
import scipy.fftpack
def kde(data, N=None, MIN=None, MAX=None):
# Parameters to set up the mesh on which to calculate
N = 2**14 if N is None else int(2**sci.ceil(sci.log2(N)))
if MIN is None or MAX is None:
minimum = min(data)
maximum = max(data)
Range = maximum - minimum
MIN = minimum - Range/10 if MIN is None else MIN
MAX = maximum + Range/10 if MAX is None else MAX
# Range of the data
R = MAX-MIN
# Histogram the data to get a crude first approximation of the density
M = len(data)
DataHist, bins = sci.histogram(data, bins=N, range=(MIN,MAX))
DataHist = DataHist/M
DCTData = scipy.fftpack.dct(DataHist, norm=None)
I = [iN*iN for iN in xrange(1, N)]
SqDCTData = (DCTData[1:]/2)**2
# The fixed point calculation finds the bandwidth = t_star
guess = 0.1
try:
t_star = scipy.optimize.brentq(fixed_point, 0, guess,
args=(M, I, SqDCTData))
except ValueError:
print 'Oops!'
return None
# Smooth the DCTransformed data using t_star
SmDCTData = DCTData*sci.exp(-sci.arange(N)**2*sci.pi**2*t_star/2)
# Inverse DCT to get density
density = scipy.fftpack.idct(SmDCTData, norm=None)*N/R
mesh = [(bins[i]+bins[i+1])/2 for i in xrange(N)]
bandwidth = sci.sqrt(t_star)*R
density = density/sci.trapz(density, mesh)
return bandwidth, mesh, density
def fixed_point(t, M, I, a2):
l=7
I = sci.float128(I)
M = sci.float128(M)
a2 = sci.float128(a2)
f = 2*sci.pi**(2*l)*sci.sum(I**l*a2*sci.exp(-I*sci.pi**2*t))
for s in range(l, 1, -1):
K0 = sci.prod(xrange(1, 2*s, 2))/sci.sqrt(2*sci.pi)
const = (1 + (1/2)**(s + 1/2))/3
time=(2*const*K0/M/f)**(2/(3+2*s))
f=2*sci.pi**(2*s)*sci.sum(I**s*a2*sci.exp(-I*sci.pi**2*time))
return t-(2*M*sci.sqrt(sci.pi)*f)**(-2/5)