Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions pymc3/distributions/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,28 @@ def _repr_latex_(self, name=None, dist=None):
"""Magic method name for IPython to use for LaTeX formatting."""
return None

def logp_nojac(self, *args, **kwargs):
"""Return the logp, but do not include a jacobian term for transforms.

If we use different parametrizations for the same distribution, we
need to add the determinant of the jacobian of the transformation
to make sure the densities still describe the same distribution.
However, MAP estimates are not invariant with respect to the
parametrization, we need to exclude the jacobian terms in this case.

This function should be overwritten in base classes for transformed
distributions.
"""
return self.logp(*args, **kwargs)

def logp_sum(self, *args, **kwargs):
"""Return the sum of the logp values for the given observations.

Subclasses can use this to improve the speed of logp evaluations
if only the sum of the logp values is needed.
"""
return tt.sum(self.logp(*args, **kwargs))

__latex__ = _repr_latex_


Expand Down
3 changes: 3 additions & 0 deletions pymc3/distributions/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def logp(self, x):
return (self.dist.logp(self.transform_used.backward(x)) +
self.transform_used.jacobian_det(x))

def logp_nojac(self, x):
return self.dist.logp(self.transform_used.backward(x))

transform = Transform


Expand Down
76 changes: 70 additions & 6 deletions pymc3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ def d2logp(self, vars=None):
"""Compiled log probability density hessian function"""
return self.model.fn(hessian(self.logpt, vars))

@property
def logp_nojac(self):
return self.model.fn(self.logp_nojact)

def dlogp_nojac(self, vars=None):
"""Compiled log density gradient function, without jacobian terms."""
return self.model.fn(gradient(self.logp_nojact, vars))

def d2logp_nojac(self, vars=None):
"""Compiled log density hessian function, without jacobian terms."""
return self.model.fn(hessian(self.logp_nojact, vars))

@property
def fastlogp(self):
"""Compiled log probability density function"""
Expand All @@ -182,13 +194,36 @@ def fastd2logp(self, vars=None):
"""Compiled log probability density hessian function"""
return self.model.fastfn(hessian(self.logpt, vars))

@property
def fastlogp_nojac(self):
return self.model.fastfn(self.logp_nojact)

def fastdlogp_nojac(self, vars=None):
"""Compiled log density gradient function, without jacobian terms."""
return self.model.fastfn(gradient(self.logp_nojact, vars))

def fastd2logp_nojac(self, vars=None):
"""Compiled log density hessian function, without jacobian terms."""
return self.model.fastfn(hessian(self.logp_nojact, vars))

@property
def logpt(self):
"""Theano scalar of log-probability of the model"""
if getattr(self, 'total_size', None) is not None:
logp = tt.sum(self.logp_elemwiset) * self.scaling
logp = self.logp_sum_unscaledt * self.scaling
else:
logp = self.logp_sum_unscaledt
if self.name is not None:
logp.name = '__logp_%s' % self.name
return logp

@property
def logp_nojact(self):
"""Theano scalar of log-probability, excluding jacobian terms."""
if getattr(self, 'total_size', None) is not None:
logp = tt.sum(self.logp_nojac_unscaledt) * self.scaling
else:
logp = tt.sum(self.logp_elemwiset)
logp = tt.sum(self.logp_nojac_unscaledt)
if self.name is not None:
logp.name = '__logp_%s' % self.name
return logp
Expand Down Expand Up @@ -623,9 +658,26 @@ def logp_dlogp_function(self, grad_vars=None, **kwargs):
def logpt(self):
"""Theano scalar of log-probability of the model"""
with self:
factors = [var.logpt for var in self.basic_RVs] + self.potentials
logp = tt.add(*map(tt.sum, factors))
logp.name = '__logp'
factors = [var.logpt for var in self.basic_RVs]
logp_factors = tt.sum(factors)
logp_potentials = tt.sum([tt.sum(pot) for pot in self.potentials])
logp = logp_factors + logp_potentials
if self.name:
logp.name = '__logp_%s' % self.name
else:
logp.name = '__logp'
return logp

@property
def logp_nojact(self):
"""Theano scalar of log-probability of the model"""
with self:
factors = [var.logp_nojact for var in self.basic_RVs] + self.potentials
logp = tt.sum([tt.sum(factor) for factor in factors])
if self.name:
logp.name = '__logp_nojac_%s' % self.name
else:
logp.name = '__logp_nojac'
return logp

@property
Expand All @@ -634,7 +686,7 @@ def varlogpt(self):
(excluding deterministic)."""
with self:
factors = [var.logpt for var in self.vars]
return tt.add(*map(tt.sum, factors))
return tt.sum(factors)

@property
def vars(self):
Expand Down Expand Up @@ -1066,6 +1118,10 @@ def __init__(self, type=None, owner=None, index=None, name=None,
self.tag.test_value = np.ones(
distribution.shape, distribution.dtype) * distribution.default()
self.logp_elemwiset = distribution.logp(self)
# The logp might need scaling in minibatches.
# This is done in `Factor`.
self.logp_sum_unscaledt = distribution.logp_sum(self)
self.logp_nojac_unscaledt = distribution.logp_nojac(self)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scaledt?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 't' stands for "tensor". I thought I'd use the same naming convention as in model.logpt vs model.logp.

self.total_size = total_size
self.model = model
self.scaling = _get_scaling(total_size, self.shape, self.ndim)
Expand Down Expand Up @@ -1169,6 +1225,10 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,

self.missing_values = data.missing_values
self.logp_elemwiset = distribution.logp(data)
# The logp might need scaling in minibatches.
# This is done in `Factor`.
self.logp_sum_unscaledt = distribution.logp_sum(data)
self.logp_nojac_unscaledt = distribution.logp_nojac(data)
self.total_size = total_size
self.model = model
self.distribution = distribution
Expand Down Expand Up @@ -1220,6 +1280,10 @@ def __init__(self, name, data, distribution, total_size=None, model=None):
self.missing_values = [datum.missing_values for datum in self.data.values()
if datum.missing_values is not None]
self.logp_elemwiset = distribution.logp(**self.data)
# The logp might need scaling in minibatches.
# This is done in `Factor`.
self.logp_sum_unscaledt = distribution.logp_sum(**self.data)
self.logp_nojac_unscaledt = distribution.logp_nojac(**self.data)
self.total_size = total_size
self.model = model
self.distribution = distribution
Expand Down
10 changes: 6 additions & 4 deletions pymc3/tuning/starting.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@

__all__ = ['find_MAP']


def find_MAP(start=None, vars=None, fmin=None,
return_raw=False, model=None, live_disp=False, callback=None, *args, **kwargs):
return_raw=False, model=None, live_disp=False, callback=None,
*args, **kwargs):
"""
Sets state to the local maximum a posteriori point given a model.
Current default of fmin_Hessian does not deal well with optimizing close
Expand Down Expand Up @@ -69,7 +71,7 @@ def find_MAP(start=None, vars=None, fmin=None,
except AttributeError:
gradient_avail = False

if disc_vars or not gradient_avail :
if disc_vars or not gradient_avail:
pm._log.warning("Warning: gradient not available." +
"(E.g. vars contains discrete variables). MAP " +
"estimates may not be accurate for the default " +
Expand All @@ -88,13 +90,13 @@ def find_MAP(start=None, vars=None, fmin=None,
start = Point(start, model=model)
bij = DictToArrayBijection(ArrayOrdering(vars), start)

logp = bij.mapf(model.fastlogp)
logp = bij.mapf(model.fastlogp_nojac)
def logp_o(point):
return nan_to_high(-logp(point))

# Check to see if minimization function actually uses the gradient
if 'fprime' in getargspec(fmin).args:
dlogp = bij.mapf(model.fastdlogp(vars))
dlogp = bij.mapf(model.fastdlogp_nojac(vars))
def grad_logp_o(point):
return nan_to_num(-dlogp(point))

Expand Down