Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,408 changes: 722 additions & 686 deletions docs/source/notebooks/dependent_density_regression.ipynb

Large diffs are not rendered by default.

41 changes: 28 additions & 13 deletions pymc3/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,33 @@ def random_choice(*args, **kwargs):
return np.random.choice(k, p=w, *args, **kwargs)

w = draw_values([self.w], point=point)[0]

comp_tmp = self._comp_samples(point=point, size=None)
if self.shape.size == 0:
distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]
else:
distshape = self.shape
w_samples = generate_samples(random_choice,
w=w,
broadcast_shape=w.shape[:-1] or (1,),
dist_shape=self.shape,
dist_shape=distshape,
size=size).squeeze()
comp_samples = self._comp_samples(point=point, size=size)

if comp_samples.ndim > 1:
return np.squeeze(comp_samples[np.arange(w_samples.size), w_samples])
if (size is None) or (distshape.size == 0):
comp_samples = self._comp_samples(point=point, size=size)
if comp_samples.ndim > 1:
samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
else:
samples = np.squeeze(comp_samples[w_samples])
else:
return np.squeeze(comp_samples[w_samples])
samples = np.zeros((size,)+tuple(distshape))
for i in range(size):
w_tmp = w_samples[i, :]
comp_tmp = self._comp_samples(point=point, size=None)
if comp_tmp.ndim > 1:
samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
else:
samples[i, :] = np.squeeze(comp_tmp[w_tmp])

return samples


class NormalMixture(Mixture):
Expand Down Expand Up @@ -197,22 +212,22 @@ class NormalMixture(Mixture):
the component standard deviations
tau : array of floats
the component precisions
comp_shape : shape of the Normal component
notice that it should be different than the shape
of the mixture distribution, with one axis being
the number of components.

Note: You only have to pass in sd or tau, but not both.
"""

def __init__(self, w, mu, *args, **kwargs):
def __init__(self, w, mu, comp_shape=(), *args, **kwargs):
_, sd = get_tau_sd(tau=kwargs.pop('tau', None),
sd=kwargs.pop('sd', None))

distshape = np.broadcast(mu, sd).shape
self.mu = mu = tt.as_tensor_variable(mu)
self.sd = sd = tt.as_tensor_variable(sd)

if not distshape:
distshape = np.broadcast(mu.tag.test_value, sd.tag.test_value).shape

super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=distshape),
super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=comp_shape),
*args, **kwargs)

def _repr_latex_(self, name=None, dist=None):
Expand Down
66 changes: 64 additions & 2 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,17 +754,79 @@ def ref_rand(size, w, mu, sd):
pymc3_random(pm.NormalMixture, {'w': Simplex(2),
'mu': Domain([[.05, 2.5], [-5., 1.]], edges=(None, None)),
'sd': Domain([[1, 1], [1.5, 2.]], edges=(None, None))},
extra_args={'comp_shape': 2},
size=1000,
ref_rand=ref_rand)
pymc3_random(pm.NormalMixture, {'w': Simplex(3),
'mu': Domain([[-5., 1., 2.5]], edges=(None, None)),
'sd': Domain([[1.5, 2., 3.]], edges=(None, None))},
extra_args={'comp_shape': 3},
size=1000,
ref_rand=ref_rand)


def test_mixture_random_shape():
# test the shape broadcasting in mixture random
y = np.concatenate([nr.poisson(5, size=10),
nr.poisson(9, size=10)])
with pm.Model() as m:
comp0 = pm.Poisson.dist(mu=np.ones(2))
w0 = pm.Dirichlet('w0', a=np.ones(2))
like0 = pm.Mixture('like0',
w=w0,
comp_dists=comp0,
shape=y.shape,
observed=y)

comp1 = pm.Poisson.dist(mu=np.ones((20, 2)),
shape=(20, 2))
w1 = pm.Dirichlet('w1', a=np.ones(2))
like1 = pm.Mixture('like1',
w=w1,
comp_dists=comp1, observed=y)

comp2 = pm.Poisson.dist(mu=np.ones(2))
w2 = pm.Dirichlet('w2',
a=np.ones(2),
shape=(20, 2))
like2 = pm.Mixture('like2',
w=w2,
comp_dists=comp2,
observed=y)

comp3 = pm.Poisson.dist(mu=np.ones(2),
shape=(20, 2))
w3 = pm.Dirichlet('w3',
a=np.ones(2),
shape=(20, 2))
like3 = pm.Mixture('like3',
w=w3,
comp_dists=comp3,
observed=y)

rand0 = like0.distribution.random(m.test_point, size=100)
assert rand0.shape == (100, 20)

rand1 = like1.distribution.random(m.test_point, size=100)
assert rand1.shape == (100, 20)

rand2 = like2.distribution.random(m.test_point, size=100)
assert rand2.shape == (100, 20)

rand3 = like3.distribution.random(m.test_point, size=100)
assert rand3.shape == (100, 20)

with m:
ppc = pm.sample_ppc([m.test_point], samples=200)
assert ppc['like0'].shape == (200, 20)
assert ppc['like1'].shape == (200, 20)
assert ppc['like2'].shape == (200, 20)
assert ppc['like3'].shape == (200, 20)


def test_density_dist_with_random_sampleable():
with pm.Model() as model:
mu = pm.Normal('mu',0,1)
mu = pm.Normal('mu', 0, 1)
normal_dist = pm.Normal.dist(mu, 1)
pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random)
trace = pm.sample(100)
Expand All @@ -776,7 +838,7 @@ def test_density_dist_with_random_sampleable():

def test_density_dist_without_random_not_sampleable():
with pm.Model() as model:
mu = pm.Normal('mu',0,1)
mu = pm.Normal('mu', 0, 1)
normal_dist = pm.Normal.dist(mu, 1)
pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100))
trace = pm.sample(100)
Expand Down
34 changes: 32 additions & 2 deletions pymc3/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,14 @@ def test_empty_model():
pm.sample()
error.match('any free variables')


def test_partial_trace_sample():
with pm.Model() as model:
a = pm.Normal('a', mu=0, sd=1)
b = pm.Normal('b', mu=0, sd=1)
trace = pm.sample(trace=[a])


@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
class TestNamedSampling(SeededTest):
def test_shared_named(self):
Expand Down Expand Up @@ -235,7 +237,34 @@ def test_normal_vector(self):
trace = pm.sample()

with model:
# test list input
# test list input
ppc0 = pm.sample_ppc([model.test_point], samples=10)
ppc = pm.sample_ppc(trace, samples=10, vars=[])
assert len(ppc) == 0
ppc = pm.sample_ppc(trace, samples=10, vars=[a])
assert 'a' in ppc
assert ppc['a'].shape == (10, 2)

ppc = pm.sample_ppc(trace, samples=10, vars=[a], size=4)
assert 'a' in ppc
assert ppc['a'].shape == (10, 4, 2)

def test_vector_observed(self):
# This test was initially created to test whether observedRVs
# can assert the shape automatically from the observed data.
# It can make sample_ppc correct for RVs similar to below (i.e.,
# some kind of broadcasting is involved). However, doing so makes
# the application with `theano.shared` array as observed data
# invalid (after the `.set_value` the RV shape could change).
with pm.Model() as model:
mu = pm.Normal('mu', mu=0, sd=1)
a = pm.Normal('a', mu=mu, sd=1,
shape=2, # necessary to make ppc sample correct
observed=np.array([0., 1.]))
trace = pm.sample()

with model:
# test list input
ppc0 = pm.sample_ppc([model.test_point], samples=10)
ppc = pm.sample_ppc(trace, samples=10, vars=[])
assert len(ppc) == 0
Expand All @@ -254,7 +283,7 @@ def test_sum_normal(self):
trace = pm.sample()

with model:
# test list input
# test list input
ppc0 = pm.sample_ppc([model.test_point], samples=10)
ppc = pm.sample_ppc(trace, samples=1000, vars=[b])
assert len(ppc) == 1
Expand All @@ -263,6 +292,7 @@ def test_sum_normal(self):
_, pval = stats.kstest(ppc['b'], stats.norm(scale=scale).cdf)
assert pval > 0.001


class TestSamplePPCW(SeededTest):
def test_sample_ppc_w(self):
data0 = np.random.normal(0, 1, size=500)
Expand Down