diff --git a/pymc3/__init__.py b/pymc3/__init__.py index d2fe3bc39d..1214ec9739 100644 --- a/pymc3/__init__.py +++ b/pymc3/__init__.py @@ -5,7 +5,15 @@ from .distributions import * from .glm import * from . import gp -from .math import logaddexp, logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit +from .math import ( + logaddexp, + logsumexp, + logit, + invlogit, + expand_packed_triangular, + probit, + invprobit, +) from .model import * from .model_graph import model_to_graphviz from .stats import * @@ -28,7 +36,8 @@ from .data import * import logging -_log = logging.getLogger('pymc3') + +_log = logging.getLogger("pymc3") if not logging.root.handlers: _log.setLevel(logging.INFO) handler = logging.StreamHandler() diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py index 95a179b780..d3b9561df5 100644 --- a/pymc3/backends/__init__.py +++ b/pymc3/backends/__init__.py @@ -122,9 +122,8 @@ from ..backends.sqlite import SQLite from ..backends.hdf5 import HDF5 -_shortcuts = {'text': {'backend': Text, - 'name': 'mcmc'}, - 'sqlite': {'backend': SQLite, - 'name': 'mcmc.sqlite'}, - 'hdf5': {'backend': HDF5, - 'name': 'mcmc.hdf5'}} +_shortcuts = { + "text": {"backend": Text, "name": "mcmc"}, + "sqlite": {"backend": SQLite, "name": "mcmc.sqlite"}, + "hdf5": {"backend": HDF5, "name": "mcmc.hdf5"}, +} diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 1efb55a3e2..d08673f62e 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -13,7 +13,7 @@ from ..model import modelcontext from .report import SamplerReport, merge_reports -logger = logging.getLogger('pymc3') +logger = logging.getLogger("pymc3") class BackendError(Exception): @@ -58,10 +58,8 @@ def __init__(self, name, model=None, vars=None, test_point=None): test_point_.update(test_point) test_point = test_point_ var_values = list(zip(self.varnames, self.fn(test_point))) - self.var_shapes = {var: value.shape - for var, value in var_values} - self.var_dtypes = {var: value.dtype - for var, value in var_values} + self.var_shapes = {var: value.shape for var, value in var_values} + self.var_dtypes = {var: value.dtype for var, value in var_values} self.chain = None self._is_base_setup = False self.sampler_vars = None @@ -87,8 +85,9 @@ def _set_sampler_vars(self, sampler_vars): for stats in sampler_vars: for key, dtype in stats.items(): if dtypes.setdefault(key, dtype) != dtype: - raise ValueError("Sampler statistic %s appears with " - "different types." % key) + raise ValueError( + "Sampler statistic %s appears with " "different types." % key + ) self.sampler_vars = sampler_vars @@ -137,7 +136,7 @@ def __getitem__(self, idx): try: return self.point(int(idx)) except (ValueError, TypeError): # Passed variable or variable name. - raise ValueError('Can only index with slice or integer') + raise ValueError("Can only index with slice or integer") def __len__(self): raise NotImplementedError @@ -181,13 +180,14 @@ def get_sampler_stats(self, varname, sampler_idx=None, burn=0, thin=1): if sampler_idx is not None: return self._get_sampler_stats(varname, sampler_idx, burn, thin) - sampler_idxs = [i for i, s in enumerate(self.sampler_vars) - if varname in s] + sampler_idxs = [i for i, s in enumerate(self.sampler_vars) if varname in s] if not sampler_idxs: raise KeyError("Unknown sampler stat %s" % varname) - vals = np.stack([self._get_sampler_stats(varname, i, burn, thin) - for i in sampler_idxs], axis=-1) + vals = np.stack( + [self._get_sampler_stats(varname, i, burn, thin) for i in sampler_idxs], + axis=-1, + ) if vals.shape[-1] == 1: return vals[..., 0] else: @@ -267,13 +267,14 @@ def __init__(self, straces): self._report = SamplerReport() for strace in straces: - if hasattr(strace, '_warnings'): + if hasattr(strace, "_warnings"): self._report._add_warnings(strace._warnings, strace.chain) def __repr__(self): - template = '<{}: {} chains, {} iterations, {} variables>' - return template.format(self.__class__.__name__, - self.nchains, len(self), len(self.varnames)) + template = "<{}: {} chains, {} iterations, {} variables>" + return template.format( + self.__class__.__name__, self.nchains, len(self), len(self.varnames) + ) @property def nchains(self): @@ -310,16 +311,26 @@ def __getitem__(self, idx): var = str(var) if var in self.varnames: if var in self.stat_names: - warnings.warn("Attribute access on a trace object is ambigous. " - "Sampler statistic and model variable share a name. Use " - "trace.get_values or trace.get_sampler_stats.") + warnings.warn( + "Attribute access on a trace object is ambigous. " + "Sampler statistic and model variable share a name. Use " + "trace.get_values or trace.get_sampler_stats." + ) return self.get_values(var, burn=burn, thin=thin) if var in self.stat_names: return self.get_sampler_stats(var, burn=burn, thin=thin) raise KeyError("Unknown variable %s" % var) - _attrs = set(['_straces', 'varnames', 'chains', 'stat_names', - 'supports_sampler_stats', '_report']) + _attrs = set( + [ + "_straces", + "varnames", + "chains", + "stat_names", + "supports_sampler_stats", + "_report", + ] + ) def __getattr__(self, name): # Avoid infinite recursion when called before __init__ @@ -330,14 +341,17 @@ def __getattr__(self, name): name = str(name) if name in self.varnames: if name in self.stat_names: - warnings.warn("Attribute access on a trace object is ambigous. " - "Sampler statistic and model variable share a name. Use " - "trace.get_values or trace.get_sampler_stats.") + warnings.warn( + "Attribute access on a trace object is ambigous. " + "Sampler statistic and model variable share a name. Use " + "trace.get_values or trace.get_sampler_stats." + ) return self.get_values(name) if name in self.stat_names: return self.get_sampler_stats(name) - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, name)) + raise AttributeError( + "'{}' object has no attribute '{}'".format(type(self).__name__, name) + ) def __len__(self): chain = self.chains[-1] @@ -392,10 +406,12 @@ def add_values(self, vals, overwrite=False): l_samples = len(self) * len(self.chains) l_v = len(v) if l_v != l_samples: - warnings.warn("The length of the values you are trying to " - "add ({}) does not match the number ({}) of " - "total samples in the trace " - "(chains * iterations)".format(l_v, l_samples)) + warnings.warn( + "The length of the values you are trying to " + "add ({}) does not match the number ({}) of " + "total samples in the trace " + "(chains * iterations)".format(l_v, l_samples) + ) v = np.squeeze(v.reshape(len(chains), len(self), -1)) @@ -424,8 +440,9 @@ def remove_values(self, name): chain.vars.remove(va) del chain.samples[name] - def get_values(self, varname, burn=0, thin=1, combine=True, chains=None, - squeeze=True): + def get_values( + self, varname, burn=0, thin=1, combine=True, chains=None, squeeze=True + ): """Get values from traces. Parameters @@ -452,14 +469,16 @@ def get_values(self, varname, burn=0, thin=1, combine=True, chains=None, chains = self.chains varname = str(varname) try: - results = [self._straces[chain].get_values(varname, burn, thin) - for chain in chains] + results = [ + self._straces[chain].get_values(varname, burn, thin) for chain in chains + ] except TypeError: # Single chain passed. results = [self._straces[chains].get_values(varname, burn, thin)] return _squeeze_cat(results, combine, squeeze) - def get_sampler_stats(self, varname, burn=0, thin=1, combine=True, - chains=None, squeeze=True): + def get_sampler_stats( + self, varname, burn=0, thin=1, combine=True, chains=None, squeeze=True + ): """Get sampler statistics from the trace. Parameters @@ -487,8 +506,10 @@ def get_sampler_stats(self, varname, burn=0, thin=1, combine=True, except TypeError: chains = [chains] - results = [self._straces[chain].get_sampler_stats(varname, None, burn, thin) - for chain in chains] + results = [ + self._straces[chain].get_sampler_stats(varname, None, burn, thin) + for chain in chains + ] return _squeeze_cat(results, combine, squeeze) def _slice(self, slice): diff --git a/pymc3/backends/hdf5.py b/pymc3/backends/hdf5.py index 3d800bf199..5e732bbb54 100644 --- a/pymc3/backends/hdf5.py +++ b/pymc3/backends/hdf5.py @@ -2,6 +2,7 @@ import h5py from contextlib import contextmanager + @contextmanager def activator(instance): if isinstance(instance.hdf5_file, h5py.File): @@ -9,7 +10,7 @@ def activator(instance): yield return # if file is closed/not referenced: open, do job, then close - instance.hdf5_file = h5py.File(instance.name, 'a') + instance.hdf5_file = h5py.File(instance.name, "a") yield instance.hdf5_file.close() return @@ -50,21 +51,21 @@ def activate_file(self): @property def samples(self): g = self.hdf5_file.require_group(str(self.chain)) - if 'name' not in g.attrs: - g.attrs['name'] = self.chain - return g.require_group('samples') + if "name" not in g.attrs: + g.attrs["name"] = self.chain + return g.require_group("samples") @property def stats(self): g = self.hdf5_file.require_group(str(self.chain)) - if 'name' not in g.attrs: - g.attrs['name'] = self.chain - return g.require_group('stats') + if "name" not in g.attrs: + g.attrs["name"] = self.chain + return g.require_group("stats") @property def chains(self): with self.activate_file: - return [v.attrs['name'] for v in self.hdf5_file.values()] + return [v.attrs["name"] for v in self.hdf5_file.values()] @property def is_new_file(self): @@ -84,19 +85,19 @@ def nchains(self): @property def records_stats(self): with self.activate_file: - return self.hdf5_file.attrs['records_stats'] + return self.hdf5_file.attrs["records_stats"] @records_stats.setter def records_stats(self, v): with self.activate_file: - self.hdf5_file.attrs['records_stats'] = bool(v) + self.hdf5_file.attrs["records_stats"] = bool(v) def _resize(self, n): for v in self.samples.values(): v.resize(n, axis=0) for key, group in self.stats.items(): for statds in group.values(): - statds.resize((n, )) + statds.resize((n,)) @property def sampler_vars(self): @@ -123,10 +124,15 @@ def sampler_vars(self, values): if not data.keys(): # no pre-recorded stats for varname, dtype in sampler.items(): if varname not in data: - data.create_dataset(varname, (self.draws,), dtype=dtype, maxshape=(None,)) + data.create_dataset( + varname, (self.draws,), dtype=dtype, maxshape=(None,) + ) elif data.keys() != sampler.keys(): raise ValueError( - "Sampler vars can't change, names incompatible: {} != {}".format(data.keys(), sampler.keys())) + "Sampler vars can't change, names incompatible: {} != {}".format( + data.keys(), sampler.keys() + ) + ) self.records_stats = True def setup(self, draws, chain, sampler_vars=None): @@ -146,16 +152,18 @@ def setup(self, draws, chain, sampler_vars=None): with self.activate_file: for varname, shape in self.var_shapes.items(): if varname not in self.samples: - self.samples.create_dataset(name=varname, shape=(draws, ) + shape, - dtype=self.var_dtypes[varname], - maxshape=(None, ) + shape) + self.samples.create_dataset( + name=varname, + shape=(draws,) + shape, + dtype=self.var_dtypes[varname], + maxshape=(None,) + shape, + ) self.draw_idx = len(self) self.draws = self.draw_idx + draws self._set_sampler_vars(sampler_vars) self._is_base_setup = True self._resize(self.draws) - def close(self): with self.activate_file: if self.draw_idx == self.draws: @@ -190,8 +198,9 @@ def _slice(self, idx): start, stop, step = idx.indices(len(self)) sliced = ndarray.NDArray(model=self.model, vars=self.vars) sliced.chain = self.chain - sliced.samples = {v: self.samples[v][start:stop:step] - for v in self.varnames} + sliced.samples = { + v: self.samples[v][start:stop:step] for v in self.varnames + } sliced.draw_idx = (stop - start) // step return sliced diff --git a/pymc3/backends/ndarray.py b/pymc3/backends/ndarray.py index 1c57bb02dc..9c9610cbcc 100644 --- a/pymc3/backends/ndarray.py +++ b/pymc3/backends/ndarray.py @@ -35,7 +35,7 @@ def save_trace(trace, directory=None, overwrite=False): str, path to the directory where the trace was saved """ if directory is None: - directory = '.pymc_{}.trace' + directory = ".pymc_{}.trace" idx = 1 while os.path.exists(directory.format(idx)): idx += 1 @@ -45,8 +45,10 @@ def save_trace(trace, directory=None, overwrite=False): if overwrite: shutil.rmtree(directory) else: - raise OSError('Cautiously refusing to overwrite the already existing {}! Please supply ' - 'a different directory, or set `overwrite=True`'.format(directory)) + raise OSError( + "Cautiously refusing to overwrite the already existing {}! Please supply " + "a different directory, or set `overwrite=True`".format(directory) + ) os.makedirs(directory) for chain, ndarray in trace._straces.items(): @@ -72,15 +74,15 @@ def load_trace(directory, model=None): pm.Multitrace that was saved in the directory """ straces = [] - for directory in glob.glob(os.path.join(directory, '*')): + for directory in glob.glob(os.path.join(directory, "*")): if os.path.isdir(directory): straces.append(SerializeNDArray(directory).load(model)) return base.MultiTrace(straces) class SerializeNDArray(object): - metadata_file = 'metadata.json' - samples_file = 'samples.npz' + metadata_file = "metadata.json" + samples_file = "samples.npz" def __init__(self, directory): """Helper to save and load NDArray objects""" @@ -99,10 +101,10 @@ def to_metadata(ndarray): stats.append({key: value.tolist() for key, value in stat.items()}) metadata = { - 'draw_idx': ndarray.draw_idx, - 'draws': ndarray.draws, - '_stats': stats, - 'chain': ndarray.chain, + "draw_idx": ndarray.draw_idx, + "draws": ndarray.draws, + "_stats": stats, + "chain": ndarray.chain, } return metadata @@ -114,14 +116,14 @@ def save(self, ndarray): to reload the multitrace. """ if not isinstance(ndarray, NDArray): - raise TypeError('Can only save NDArray') + raise TypeError("Can only save NDArray") if os.path.isdir(self.directory): shutil.rmtree(self.directory) os.mkdir(self.directory) - with open(self.metadata_path, 'w') as buff: + with open(self.metadata_path, "w") as buff: json.dump(SerializeNDArray.to_metadata(ndarray), buff) np.savez_compressed(self.samples_path, **ndarray.samples) @@ -129,10 +131,12 @@ def save(self, ndarray): def load(self, model): """Load the saved ndarray from file""" new_trace = NDArray(model=model) - with open(self.metadata_path, 'r') as buff: + with open(self.metadata_path, "r") as buff: metadata = json.load(buff) - metadata['_stats'] = [{k: np.array(v) for k, v in stat.items()} for stat in metadata['_stats']] + metadata["_stats"] = [ + {k: np.array(v) for k, v in stat.items()} for stat in metadata["_stats"] + ] for key, value in metadata.items(): setattr(new_trace, key, value) @@ -187,16 +191,16 @@ def setup(self, draws, chain, sampler_vars=None): self.draw_idx = old_draws for varname, shape in self.var_shapes.items(): old_var_samples = self.samples[varname] - new_var_samples = np.zeros((draws, ) + shape, - self.var_dtypes[varname]) - self.samples[varname] = np.concatenate((old_var_samples, - new_var_samples), - axis=0) + new_var_samples = np.zeros((draws,) + shape, self.var_dtypes[varname]) + self.samples[varname] = np.concatenate( + (old_var_samples, new_var_samples), axis=0 + ) else: # Otherwise, make array of zeros for each variable. self.draws = draws for varname, shape in self.var_shapes.items(): - self.samples[varname] = np.zeros((draws, ) + shape, - dtype=self.var_dtypes[varname]) + self.samples[varname] = np.zeros( + (draws,) + shape, dtype=self.var_dtypes[varname] + ) if sampler_vars is None: return @@ -247,12 +251,14 @@ def close(self): return # Remove trailing zeros if interrupted before completed all # draws. - self.samples = {var: vtrace[:self.draw_idx] - for var, vtrace in self.samples.items()} + self.samples = { + var: vtrace[: self.draw_idx] for var, vtrace in self.samples.items() + } if self._stats is not None: self._stats = [ - {var: trace[:self.draw_idx] for var, trace in stats.items()} - for stats in self._stats] + {var: trace[: self.draw_idx] for var, trace in stats.items()} + for stats in self._stats + ] # Selection methods @@ -286,8 +292,9 @@ def _slice(self, idx): sliced = NDArray(model=self.model, vars=self.vars) sliced.chain = self.chain - sliced.samples = {varname: values[idx] - for varname, values in self.samples.items()} + sliced.samples = { + varname: values[idx] for varname, values in self.samples.items() + } sliced.sampler_vars = self.sampler_vars sliced.draw_idx = (idx.stop - idx.start) // idx.step @@ -307,8 +314,7 @@ def point(self, idx): with variable names as keys. """ idx = int(idx) - return {varname: values[idx] - for varname, values in self.samples.items()} + return {varname: values[idx] for varname, values in self.samples.items()} def _slice_as_ndarray(strace, idx): @@ -316,16 +322,20 @@ def _slice_as_ndarray(strace, idx): sliced.chain = strace.chain # Happy path where we do not need to load everything from the trace - if ((idx.step is None or idx.step >= 1) and - (idx.stop is None or idx.stop == len(strace))): + if (idx.step is None or idx.step >= 1) and ( + idx.stop is None or idx.stop == len(strace) + ): start, stop, step = idx.indices(len(strace)) - sliced.samples = {v: strace.get_values(v, burn=idx.start, thin=idx.step) - for v in strace.varnames} + sliced.samples = { + v: strace.get_values(v, burn=idx.start, thin=idx.step) + for v in strace.varnames + } sliced.draw_idx = (stop - start) // step else: start, stop, step = idx.indices(len(strace)) - sliced.samples = {v: strace.get_values(v)[start:stop:step] - for v in strace.varnames} + sliced.samples = { + v: strace.get_values(v)[start:stop:step] for v in strace.varnames + } sliced.draw_idx = (stop - start) // step return sliced diff --git a/pymc3/backends/report.py b/pymc3/backends/report.py index f2b81d8761..77ff1f3425 100644 --- a/pymc3/backends/report.py +++ b/pymc3/backends/report.py @@ -4,7 +4,7 @@ from ..util import is_transformed_name, get_untransformed_name -logger = logging.getLogger('pymc3') +logger = logging.getLogger("pymc3") @enum.unique @@ -22,16 +22,16 @@ class WarningType(enum.Enum): SamplerWarning = namedtuple( - 'SamplerWarning', - "kind, message, level, step, exec_info, extra") + "SamplerWarning", "kind, message, level, step, exec_info, extra" +) _LEVELS = { - 'info': logging.INFO, - 'error': logging.ERROR, - 'warn': logging.WARN, - 'debug': logging.DEBUG, - 'critical': logging.CRITICAL, + "info": logging.INFO, + "error": logging.ERROR, + "warn": logging.WARN, + "debug": logging.DEBUG, + "critical": logging.CRITICAL, } @@ -50,21 +50,22 @@ def _warnings(self): @property def ok(self): """Whether the automatic convergence checks found serious problems.""" - return all(_LEVELS[warn.level] < _LEVELS['warn'] - for warn in self._warnings) + return all(_LEVELS[warn.level] < _LEVELS["warn"] for warn in self._warnings) - def raise_ok(self, level='error'): - errors = [warn for warn in self._warnings - if _LEVELS[warn.level] >= _LEVELS[level]] + def raise_ok(self, level="error"): + errors = [ + warn for warn in self._warnings if _LEVELS[warn.level] >= _LEVELS[level] + ] if errors: - raise ValueError('Serious convergence issues during sampling.') + raise ValueError("Serious convergence issues during sampling.") def _run_convergence_checks(self, trace, model): if trace.nchains == 1: - msg = ("Only one chain was sampled, this makes it impossible to " - "run some convergence checks") - warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info', - None, None, None) + msg = ( + "Only one chain was sampled, this makes it impossible to " + "run some convergence checks" + ) + warn = SamplerWarning(WarningType.BAD_PARAMS, msg, "info", None, None, None) self._add_warnings([warn]) return @@ -86,44 +87,61 @@ def _run_convergence_checks(self, trace, model): warnings = [] rhat_max = max(val.max() for val in gelman_rubin.values()) if rhat_max > 1.4: - msg = ("The gelman-rubin statistic is larger than 1.4 for some " - "parameters. The sampler did not converge.") + msg = ( + "The gelman-rubin statistic is larger than 1.4 for some " + "parameters. The sampler did not converge." + ) warn = SamplerWarning( - WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin) + WarningType.CONVERGENCE, msg, "error", None, None, gelman_rubin + ) warnings.append(warn) elif rhat_max > 1.2: - msg = ("The gelman-rubin statistic is larger than 1.2 for some " - "parameters.") + msg = ( + "The gelman-rubin statistic is larger than 1.2 for some " "parameters." + ) warn = SamplerWarning( - WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin) + WarningType.CONVERGENCE, msg, "warn", None, None, gelman_rubin + ) warnings.append(warn) elif rhat_max > 1.05: - msg = ("The gelman-rubin statistic is larger than 1.05 for some " - "parameters. This indicates slight problems during " - "sampling.") + msg = ( + "The gelman-rubin statistic is larger than 1.05 for some " + "parameters. This indicates slight problems during " + "sampling." + ) warn = SamplerWarning( - WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin) + WarningType.CONVERGENCE, msg, "info", None, None, gelman_rubin + ) warnings.append(warn) eff_min = min(val.min() for val in effective_n.values()) n_samples = len(trace) * trace.nchains if eff_min < 200 and n_samples >= 500: - msg = ("The estimated number of effective samples is smaller than " - "200 for some parameters.") + msg = ( + "The estimated number of effective samples is smaller than " + "200 for some parameters." + ) warn = SamplerWarning( - WarningType.CONVERGENCE, msg, 'error', None, None, effective_n) + WarningType.CONVERGENCE, msg, "error", None, None, effective_n + ) warnings.append(warn) elif eff_min / n_samples < 0.1: - msg = ("The number of effective samples is smaller than " - "10% for some parameters.") + msg = ( + "The number of effective samples is smaller than " + "10% for some parameters." + ) warn = SamplerWarning( - WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n) + WarningType.CONVERGENCE, msg, "warn", None, None, effective_n + ) warnings.append(warn) elif eff_min / n_samples < 0.25: - msg = ("The number of effective samples is smaller than " - "25% for some parameters.") + msg = ( + "The number of effective samples is smaller than " + "25% for some parameters." + ) warn = SamplerWarning( - WarningType.CONVERGENCE, msg, 'info', None, None, effective_n) + WarningType.CONVERGENCE, msg, "info", None, None, effective_n + ) warnings.append(warn) self._add_warnings(warnings) @@ -136,7 +154,6 @@ def _add_warnings(self, warnings, chain=None): warn_list.extend(warnings) def _log_summary(self): - def log_warning(warn): level = _LEVELS[warn.level] logger.log(level, warn.message) @@ -155,17 +172,14 @@ def filter_warns(warnings): for warn in warnings: if warn.step is None: filtered.append(warn) - elif (start <= warn.step < stop and - (warn.step - start) % step == 0): + elif start <= warn.step < stop and (warn.step - start) % step == 0: warn = warn._replace(step=warn.step - start) filtered.append(warn) return filtered report._add_warnings(filter_warns(self._global_warnings)) for chain in self._chain_warnings: - report._add_warnings( - filter_warns(self._chain_warnings[chain]), - chain) + report._add_warnings(filter_warns(self._chain_warnings[chain]), chain) return report diff --git a/pymc3/backends/sqlite.py b/pymc3/backends/sqlite.py index e0f0ff74a0..7c8b93981c 100644 --- a/pymc3/backends/sqlite.py +++ b/pymc3/backends/sqlite.py @@ -23,35 +23,42 @@ from . import tracetab as ttab TEMPLATES = { - 'table': ('CREATE TABLE IF NOT EXISTS [{table}] ' - '(recid INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, ' - 'draw INTEGER, chain INT(5), ' - '{value_cols})'), - 'insert': ('INSERT INTO [{table}] ' - '(recid, draw, chain, {value_cols}) ' - 'VALUES (NULL, ?, ?, {values})'), - 'max_draw': ('SELECT MAX(draw) FROM [{table}] ' - 'WHERE chain = ?'), - 'draw_count': ('SELECT COUNT(*) FROM [{table}] ' - 'WHERE chain = ?'), + "table": ( + "CREATE TABLE IF NOT EXISTS [{table}] " + "(recid INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, " + "draw INTEGER, chain INT(5), " + "{value_cols})" + ), + "insert": ( + "INSERT INTO [{table}] " + "(recid, draw, chain, {value_cols}) " + "VALUES (NULL, ?, ?, {values})" + ), + "max_draw": ("SELECT MAX(draw) FROM [{table}] " "WHERE chain = ?"), + "draw_count": ("SELECT COUNT(*) FROM [{table}] " "WHERE chain = ?"), # Named placeholders are used in the selection templates because # some values occur more than once in the same template. - 'select': ('SELECT * FROM [{table}] ' - 'WHERE (chain = :chain)'), - 'select_burn': ('SELECT * FROM [{table}] ' - 'WHERE (chain = :chain) AND (draw > :burn)'), - 'select_thin': ('SELECT * FROM [{table}] ' - 'WHERE (chain = :chain) AND ' - '(draw - (SELECT draw FROM [{table}] ' - 'WHERE chain = :chain ' - 'ORDER BY draw LIMIT 1)) % :thin = 0'), - 'select_burn_thin': ('SELECT * FROM [{table}] ' - 'WHERE (chain = :chain) AND (draw > :burn) ' - 'AND (draw - (SELECT draw FROM [{table}] ' - 'WHERE (chain = :chain) AND (draw > :burn) ' - 'ORDER BY draw LIMIT 1)) % :thin = 0'), - 'select_point': ('SELECT * FROM [{table}] ' - 'WHERE (chain = :chain) AND (draw = :draw)'), + "select": ("SELECT * FROM [{table}] " "WHERE (chain = :chain)"), + "select_burn": ( + "SELECT * FROM [{table}] " "WHERE (chain = :chain) AND (draw > :burn)" + ), + "select_thin": ( + "SELECT * FROM [{table}] " + "WHERE (chain = :chain) AND " + "(draw - (SELECT draw FROM [{table}] " + "WHERE chain = :chain " + "ORDER BY draw LIMIT 1)) % :thin = 0" + ), + "select_burn_thin": ( + "SELECT * FROM [{table}] " + "WHERE (chain = :chain) AND (draw > :burn) " + "AND (draw - (SELECT draw FROM [{table}] " + "WHERE (chain = :chain) AND (draw > :burn) " + "ORDER BY draw LIMIT 1)) % :thin = 0" + ), + "select_point": ( + "SELECT * FROM [{table}] " "WHERE (chain = :chain) AND (draw = :draw)" + ), } sqlite3.register_adapter(np.int32, int) @@ -108,35 +115,36 @@ def setup(self, draws, chain): self.draw_idx = self._get_max_draw(chain) + 1 self._len = None else: # Table has not been created. - self._var_cols = {varname: ttab.create_flat_names('v', shape) - for varname, shape in self.var_shapes.items()} + self._var_cols = { + varname: ttab.create_flat_names("v", shape) + for varname, shape in self.var_shapes.items() + } self._create_table() self._is_setup = True self._create_insert_queries() self._closed = False def _create_table(self): - template = TEMPLATES['table'] + template = TEMPLATES["table"] with self.db.con: for varname, var_cols in self._var_cols.items(): if np.issubdtype(self.var_dtypes[varname], np.integer): - dtype = 'INT' + dtype = "INT" else: - dtype = 'FLOAT' - colnames = ', '.join([v + ' ' + dtype for v in var_cols]) - statement = template.format(table=varname, - value_cols=colnames) + dtype = "FLOAT" + colnames = ", ".join([v + " " + dtype for v in var_cols]) + statement = template.format(table=varname, value_cols=colnames) self.db.cursor.execute(statement) def _create_insert_queries(self): - template = TEMPLATES['insert'] + template = TEMPLATES["insert"] for varname, var_cols in self._var_cols.items(): # Create insert statement for each variable. - var_str = ', '.join(var_cols) - placeholders = ', '.join(['?'] * len(var_cols)) - statement = template.format(table=varname, - value_cols=var_str, - values=placeholders) + var_str = ", ".join(var_cols) + placeholders = ", ".join(["?"] * len(var_cols)) + statement = template.format( + table=varname, value_cols=var_str, values=placeholders + ) self.var_inserts[varname] = statement def record(self, point): @@ -160,8 +168,9 @@ def _execute_queue(self): for varname in self.varnames: if not self._queue[varname]: continue - self.db.cursor.executemany(self.var_inserts[varname], - self._queue[varname]) + self.db.cursor.executemany( + self.var_inserts[varname], self._queue[varname] + ) self._queue[varname] = [] def close(self): @@ -182,7 +191,7 @@ def __len__(self): def _get_number_draws(self): self.db.connect() - statement = TEMPLATES['draw_count'].format(table=self.varnames[0]) + statement = TEMPLATES["draw_count"].format(table=self.varnames[0]) self.db.cursor.execute(statement, (self.chain,)) counts = self.db.cursor.fetchall()[0][0] if counts is None: @@ -192,8 +201,8 @@ def _get_number_draws(self): def _get_max_draw(self, chain): self.db.connect() - statement = TEMPLATES['max_draw'].format(table=self.varnames[0]) - self.db.cursor.execute(statement, (chain, )) + statement = TEMPLATES["max_draw"].format(table=self.varnames[0]) + self.db.cursor.execute(statement, (chain,)) counts = self.db.cursor.fetchall()[0][0] if counts is None: return 0 @@ -221,23 +230,24 @@ def get_values(self, varname, burn=0, thin=1): if burn < 0: burn = max(0, len(self) + burn) if thin < 1: - raise ValueError('Only positive thin values are supported ' - 'in SQLite backend.') + raise ValueError( + "Only positive thin values are supported " "in SQLite backend." + ) varname = str(varname) - statement_args = {'chain': self.chain} + statement_args = {"chain": self.chain} if burn == 0 and thin == 1: - action = 'select' + action = "select" elif thin == 1: - action = 'select_burn' - statement_args['burn'] = burn - 1 + action = "select_burn" + statement_args["burn"] = burn - 1 elif burn == 0: - action = 'select_thin' - statement_args['thin'] = thin + action = "select_thin" + statement_args["thin"] = thin else: - action = 'select_burn_thin' - statement_args['burn'] = burn - 1 - statement_args['thin'] = thin + action = "select_burn_thin" + statement_args["burn"] = burn - 1 + statement_args["thin"] = thin self.db.connect() shape = (-1,) + self.var_shapes[varname] @@ -248,7 +258,7 @@ def get_values(self, varname, burn=0, thin=1): def _slice(self, idx): if idx.stop is not None: - raise ValueError('Stop value in slice not supported.') + raise ValueError("Stop value in slice not supported.") return ndarray._slice_as_ndarray(self, idx) def point(self, idx): @@ -258,20 +268,18 @@ def point(self, idx): idx = int(idx) if idx < 0: idx = self._get_max_draw(self.chain) + idx + 1 - statement = TEMPLATES['select_point'] + statement = TEMPLATES["select_point"] self.db.connect() var_values = {} - statement_args = {'chain': self.chain, 'draw': idx} + statement_args = {"chain": self.chain, "draw": idx} for varname in self.varnames: - self.db.cursor.execute(statement.format(table=varname), - statement_args) + self.db.cursor.execute(statement.format(table=varname), statement_args) values = _rows_to_ndarray(self.db.cursor) var_values[varname] = values.reshape(self.var_shapes[varname]) return var_values class _SQLiteDB(object): - def __init__(self, name): self.name = name self.con = None @@ -312,16 +320,17 @@ def load(name, model=None): db.connect() varnames = _get_table_list(db.cursor) if len(varnames) == 0: - raise ValueError(('Can not get variable list for database' - '`{}`'.format(name))) + raise ValueError(("Can not get variable list for database" "`{}`".format(name))) chains = _get_chain_list(db.cursor, varnames[0]) straces = [] for chain in chains: strace = SQLite(name, model=model) strace.chain = chain - strace._var_cols = {varname: ttab.create_flat_names('v', shape) - for varname, shape in strace.var_shapes.items()} + strace._var_cols = { + varname: ttab.create_flat_names("v", shape) + for varname, shape in strace.var_shapes.items() + } strace._is_setup = True strace.db = db # Share the db with all traces. straces.append(strace) @@ -332,21 +341,23 @@ def _get_table_list(cursor): """Return a list of table names in the current database.""" # Modified from Django. Skips the sqlite_sequence system table used # for autoincrement key generation. - cursor.execute("SELECT name FROM sqlite_master " - "WHERE type='table' AND NOT name='sqlite_sequence' " - "ORDER BY name") + cursor.execute( + "SELECT name FROM sqlite_master " + "WHERE type='table' AND NOT name='sqlite_sequence' " + "ORDER BY name" + ) return [row[0] for row in cursor.fetchall()] def _get_var_strs(cursor, varname): - cursor.execute('SELECT * FROM [{}]'.format(varname)) + cursor.execute("SELECT * FROM [{}]".format(varname)) col_names = (col_descr[0] for col_descr in cursor.description) - return [name for name in col_names if name.startswith('v')] + return [name for name in col_names if name.startswith("v")] def _get_chain_list(cursor, varname): """Return a list of sorted chains for `varname`.""" - cursor.execute('SELECT DISTINCT chain FROM [{}]'.format(varname)) + cursor.execute("SELECT DISTINCT chain FROM [{}]".format(varname)) chains = [chain[0] for chain in cursor.fetchall()] chains.sort() return chains diff --git a/pymc3/backends/text.py b/pymc3/backends/text.py index 027a748e31..b9b5de373a 100644 --- a/pymc3/backends/text.py +++ b/pymc3/backends/text.py @@ -45,8 +45,9 @@ def __init__(self, name, model=None, vars=None, test_point=None): os.mkdir(name) super(Text, self).__init__(name, model, vars, test_point) - self.flat_names = {v: ttab.create_flat_names(v, shape) - for v, shape in self.var_shapes.items()} + self.flat_names = { + v: ttab.create_flat_names(v, shape) for v, shape in self.var_shapes.items() + } self.filename = None self._fh = None @@ -68,21 +69,22 @@ def setup(self, draws, chain): self._fh.close() self.chain = chain - self.filename = os.path.join(self.name, 'chain-{}.csv'.format(chain)) + self.filename = os.path.join(self.name, "chain-{}.csv".format(chain)) cnames = [fv for v in self.varnames for fv in self.flat_names[v]] if os.path.exists(self.filename): with open(self.filename) as fh: - prev_cnames = next(fh).strip().split(',') + prev_cnames = next(fh).strip().split(",") if prev_cnames != cnames: raise base.BackendError( "Previous file '{}' has different variables names " - "than current model.".format(self.filename)) - self._fh = open(self.filename, 'a') + "than current model.".format(self.filename) + ) + self._fh = open(self.filename, "a") else: - self._fh = open(self.filename, 'w') - self._fh.write(','.join(cnames) + '\n') + self._fh = open(self.filename, "w") + self._fh.write(",".join(cnames) + "\n") def record(self, point): """Record results of a sampling iteration. @@ -96,7 +98,7 @@ def record(self, point): for varname, value in zip(self.varnames, self.fn(point)): vals[varname] = value.ravel() columns = [str(val) for var in self.varnames for val in vals[var]] - self._fh.write(','.join(columns) + '\n') + self._fh.write(",".join(columns) + "\n") def close(self): if self._fh is not None: @@ -112,7 +114,6 @@ def _load_df(self): if "float" in str(dtype): self.df[key] = floatX(self.df[key]) - def __len__(self): if self.filename is None: return 0 @@ -140,7 +141,7 @@ def get_values(self, varname, burn=0, thin=1): def _slice(self, idx): if idx.stop is not None: - raise ValueError('Stop value in slice not supported.') + raise ValueError("Stop value in slice not supported.") return ndarray._slice_as_ndarray(self, idx) def point(self, idx): @@ -170,14 +171,14 @@ def load(name, model=None): ------- A MultiTrace instance """ - files = glob(os.path.join(name, 'chain-*.csv')) + files = glob(os.path.join(name, "chain-*.csv")) if len(files) == 0: - raise ValueError('No files present in directory {}'.format(name)) + raise ValueError("No files present in directory {}".format(name)) straces = [] for f in files: - chain = int(os.path.splitext(f)[0].rsplit('-', 1)[1]) + chain = int(os.path.splitext(f)[0].rsplit("-", 1)[1]) strace = Text(name, model=model) strace.chain = chain strace.filename = f @@ -203,7 +204,6 @@ def dump(name, trace, chains=None): chains = trace.chains for chain in chains: - filename = os.path.join(name, 'chain-{}.csv'.format(chain)) - df = ttab.trace_to_dataframe( - trace, chains=chain, include_transformed=True) + filename = os.path.join(name, "chain-{}.csv".format(chain)) + df = ttab.trace_to_dataframe(trace, chains=chain, include_transformed=True) df.to_csv(filename, index=False) diff --git a/pymc3/backends/tracetab.py b/pymc3/backends/tracetab.py index 39b5dc4ffa..c481cf2d55 100644 --- a/pymc3/backends/tracetab.py +++ b/pymc3/backends/tracetab.py @@ -6,7 +6,7 @@ from ..util import get_default_varnames -__all__ = ['trace_to_dataframe'] +__all__ = ["trace_to_dataframe"] def trace_to_dataframe(trace, chains=None, varnames=None, include_transformed=False): @@ -28,8 +28,9 @@ def trace_to_dataframe(trace, chains=None, varnames=None, include_transformed=Fa var_shapes = trace._straces[0].var_shapes if varnames is None: - varnames = get_default_varnames(var_shapes.keys(), - include_transformed=include_transformed) + varnames = get_default_varnames( + var_shapes.keys(), include_transformed=include_transformed + ) flat_names = {v: create_flat_names(v, var_shapes[v]) for v in varnames} @@ -56,13 +57,13 @@ def create_flat_names(varname, shape): return [varname] labels = (np.ravel(xs).tolist() for xs in np.indices(shape)) labels = (map(str, xs) for xs in labels) - return ['{}__{}'.format(varname, '_'.join(idxs)) for idxs in zip(*labels)] + return ["{}__{}".format(varname, "_".join(idxs)) for idxs in zip(*labels)] def _create_shape(flat_names): """Determine shape from `create_flat_names` output.""" try: - _, shape_str = flat_names[-1].rsplit('__', 1) + _, shape_str = flat_names[-1].rsplit("__", 1) except ValueError: return () - return tuple(int(i) + 1 for i in shape_str.split('_')) + return tuple(int(i) + 1 for i in shape_str.split("_")) diff --git a/pymc3/blocking.py b/pymc3/blocking.py index f95856229e..3b4bb35540 100644 --- a/pymc3/blocking.py +++ b/pymc3/blocking.py @@ -7,10 +7,10 @@ import numpy as np import collections -__all__ = ['ArrayOrdering', 'DictToArrayBijection', 'DictToVarBijection'] +__all__ = ["ArrayOrdering", "DictToArrayBijection", "DictToVarBijection"] -VarMap = collections.namedtuple('VarMap', 'var, slc, shp, dtyp') -DataMap = collections.namedtuple('DataMap', 'list_ind, slc, shp, dtype, name') +VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp") +DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name") # TODO Classes and methods need to be fully documented. @@ -29,11 +29,11 @@ def __init__(self, vars): for var in vars: name = var.name if name is None: - raise ValueError('Unnamed variable in ArrayOrdering.') + raise ValueError("Unnamed variable in ArrayOrdering.") if name in self.by_name: - raise ValueError('Name of variable not unique: %s.' % name) - if not hasattr(var, 'dshape') or not hasattr(var, 'dsize'): - raise ValueError('Shape of variable not known %s' % name) + raise ValueError("Name of variable not unique: %s." % name) + if not hasattr(var, "dshape") or not hasattr(var, "dsize"): + raise ValueError("Shape of variable not known %s" % name) slc = slice(self.size, self.size + var.dsize) varmap = VarMap(name, slc, var.dshape, var.dtype) @@ -55,12 +55,12 @@ def __init__(self, ordering, dpoint): self.dpt = dpoint # determine smallest float dtype that will fit all data - if all([x.dtyp == 'float16' for x in ordering.vmap]): - self.array_dtype = 'float16' - elif all([x.dtyp == 'float32' for x in ordering.vmap]): - self.array_dtype = 'float32' + if all([x.dtyp == "float16" for x in ordering.vmap]): + self.array_dtype = "float16" + elif all([x.dtyp == "float32" for x in ordering.vmap]): + self.array_dtype = "float32" else: - self.array_dtype = 'float64' + self.array_dtype = "float64" def map(self, dpt): """ @@ -119,22 +119,23 @@ class ListArrayOrdering(object): defining the input type 'tensor' or 'numpy' """ - def __init__(self, list_arrays, intype='numpy'): - if intype not in {'tensor', 'numpy'}: + def __init__(self, list_arrays, intype="numpy"): + if intype not in {"tensor", "numpy"}: raise ValueError("intype not in {'tensor', 'numpy'}") self.vmap = [] self.intype = intype self.size = 0 for array in list_arrays: - if self.intype == 'tensor': + if self.intype == "tensor": name = array.name array = array.tag.test_value else: - name = 'numpy' + name = "numpy" slc = slice(self.size, self.size + array.size) - self.vmap.append(DataMap( - len(self.vmap), slc, array.shape, array.dtype, name)) + self.vmap.append( + DataMap(len(self.vmap), slc, array.shape, array.dtype, name) + ) self.size += array.size @@ -211,8 +212,7 @@ def rmap(self, array): a_list = copy.copy(self.list_arrays) for list_ind, slc, shp, dtype, _ in self.ordering.vmap: - a_list[list_ind] = np.atleast_1d( - array)[slc].reshape(shp).astype(dtype) + a_list[list_ind] = np.atleast_1d(array)[slc].reshape(shp).astype(dtype) return a_list diff --git a/pymc3/data.py b/pymc3/data.py index c01784edcc..9bd7cbfec8 100644 --- a/pymc3/data.py +++ b/pymc3/data.py @@ -8,12 +8,7 @@ import theano.tensor as tt import theano -__all__ = [ - 'get_data', - 'GeneratorAdapter', - 'Minibatch', - 'align_minibatches' -] +__all__ = ["get_data", "GeneratorAdapter", "Minibatch", "align_minibatches"] def get_data(filename): @@ -27,8 +22,8 @@ def get_data(filename): ------- BytesIO of the data """ - data_pkg = 'pymc3.examples' - return io.BytesIO(pkgutil.get_data(data_pkg, os.path.join('data', filename))) + data_pkg = "pymc3.examples" + return io.BytesIO(pkgutil.get_data(data_pkg, os.path.join("data", filename))) class GenTensorVariable(tt.TensorVariable): @@ -61,14 +56,14 @@ def make_variable(self, gop, name=None): def __init__(self, generator): if not pm.vartypes.isgenerator(generator): - raise TypeError('Object should be generator like') + raise TypeError("Object should be generator like") self.test_value = pm.smartfloatX(copy(next(generator))) # make pickling potentially possible self._yielded_test_value = False self.gen = generator self.tensortype = tt.TensorType( - self.test_value.dtype, - ((False, ) * self.test_value.ndim)) + self.test_value.dtype, ((False,) * self.test_value.ndim) + ) # python3 generator def __next__(self): @@ -225,9 +220,18 @@ class Minibatch(tt.TensorVariable): RNG = collections.defaultdict(list) - @theano.configparser.change_flags(compute_test_value='raise') - def __init__(self, data, batch_size=128, dtype=None, broadcastable=None, name='Minibatch', - random_seed=42, update_shared_f=None, in_memory_size=None): + @theano.configparser.change_flags(compute_test_value="raise") + def __init__( + self, + data, + batch_size=128, + dtype=None, + broadcastable=None, + name="Minibatch", + random_seed=42, + update_shared_f=None, + in_memory_size=None, + ): if dtype is None: data = pm.smartfloatX(np.asarray(data)) else: @@ -235,17 +239,16 @@ def __init__(self, data, batch_size=128, dtype=None, broadcastable=None, name='M in_memory_slc = self.make_static_slices(in_memory_size) self.shared = theano.shared(data[in_memory_slc]) self.update_shared_f = update_shared_f - self.random_slc = self.make_random_slices(self.shared.shape, batch_size, random_seed) + self.random_slc = self.make_random_slices( + self.shared.shape, batch_size, random_seed + ) minibatch = self.shared[self.random_slc] if broadcastable is None: - broadcastable = (False, ) * minibatch.ndim + broadcastable = (False,) * minibatch.ndim minibatch = tt.patternbroadcast(minibatch, broadcastable) self.minibatch = minibatch - super(Minibatch, self).__init__( - self.minibatch.type, None, None, name=name) - theano.Apply( - theano.compile.view_op, - inputs=[self.minibatch], outputs=[self]) + super(Minibatch, self).__init__(self.minibatch.type, None, None, name=name) + theano.Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self]) self.tag.test_value = copy(self.minibatch.tag.test_value) def rslice(self, total, size, seed): @@ -254,11 +257,11 @@ def rslice(self, total, size, seed): elif isinstance(size, int): rng = pm.tt_rng(seed) Minibatch.RNG[id(self)].append(rng) - return (rng - .uniform(size=(size, ), low=0.0, high=pm.floatX(total) - 1e-16) - .astype('int64')) + return rng.uniform( + size=(size,), low=0.0, high=pm.floatX(total) - 1e-16 + ).astype("int64") else: - raise TypeError('Unrecognized size type, %r' % size) + raise TypeError("Unrecognized size type, %r" % size) def __del__(self): del Minibatch.RNG[id(self)] @@ -281,10 +284,10 @@ def make_static_slices(user_size): elif isinstance(i, slice): slc.append(i) else: - raise TypeError('Unrecognized size type, %r' % user_size) + raise TypeError("Unrecognized size type, %r" % user_size) return slc else: - raise TypeError('Unrecognized size type, %r' % user_size) + raise TypeError("Unrecognized size type, %r" % user_size) def make_random_slices(self, in_memory_shape, batch_size, default_random_seed): if batch_size is None: @@ -292,6 +295,7 @@ def make_random_slices(self, in_memory_shape, batch_size, default_random_seed): elif isinstance(batch_size, int): slc = [self.rslice(in_memory_shape[0], batch_size, default_random_seed)] elif isinstance(batch_size, (list, tuple)): + def check(t): if t is Ellipsis or t is None: return True @@ -305,12 +309,14 @@ def check(t): return True else: return False + # end check definition if not all(check(t) for t in batch_size): - raise TypeError('Unrecognized `batch_size` type, expected ' - 'int or List[int|tuple(size, random_seed)] where ' - 'size and random seed are both ints, got %r' % - batch_size) + raise TypeError( + "Unrecognized `batch_size` type, expected " + "int or List[int|tuple(size, random_seed)] where " + "size and random seed are both ints, got %r" % batch_size + ) batch_size = [ (i, default_random_seed) if isinstance(i, int) else i for i in batch_size @@ -319,12 +325,14 @@ def check(t): if Ellipsis in batch_size: sep = batch_size.index(Ellipsis) begin = batch_size[:sep] - end = batch_size[sep + 1:] + end = batch_size[sep + 1 :] if Ellipsis in end: - raise ValueError('Double Ellipsis in `batch_size` is restricted, got %r' % - batch_size) + raise ValueError( + "Double Ellipsis in `batch_size` is restricted, got %r" + % batch_size + ) if len(end) > 0: - shp_mid = shape[sep:-len(end)] + shp_mid = shape[sep : -len(end)] mid = [tt.arange(s) for s in shp_mid] else: mid = [] @@ -333,24 +341,31 @@ def check(t): end = [] mid = [] if (len(begin) + len(end)) > len(in_memory_shape.eval()): - raise ValueError('Length of `batch_size` is too big, ' - 'number of ints is bigger that ndim, got %r' - % batch_size) + raise ValueError( + "Length of `batch_size` is too big, " + "number of ints is bigger that ndim, got %r" % batch_size + ) if len(end) > 0: - shp_end = shape[-len(end):] + shp_end = shape[-len(end) :] else: shp_end = np.asarray([]) - shp_begin = shape[:len(begin)] - slc_begin = [self.rslice(shp_begin[i], t[0], t[1]) - if t is not None else tt.arange(shp_begin[i]) - for i, t in enumerate(begin)] - slc_end = [self.rslice(shp_end[i], t[0], t[1]) - if t is not None else tt.arange(shp_end[i]) - for i, t in enumerate(end)] + shp_begin = shape[: len(begin)] + slc_begin = [ + self.rslice(shp_begin[i], t[0], t[1]) + if t is not None + else tt.arange(shp_begin[i]) + for i, t in enumerate(begin) + ] + slc_end = [ + self.rslice(shp_end[i], t[0], t[1]) + if t is not None + else tt.arange(shp_end[i]) + for i, t in enumerate(end) + ] slc = slc_begin + mid + slc_end slc = slc else: - raise TypeError('Unrecognized size type, %r' % batch_size) + raise TypeError("Unrecognized size type, %r" % batch_size) return pm.theanof.ix_(*slc) def update_shared(self): @@ -376,6 +391,6 @@ def align_minibatches(batches=None): else: for b in batches: if not isinstance(b, Minibatch): - raise TypeError('{b} is not a Minibatch') + raise TypeError("{b} is not a Minibatch") for rng in Minibatch.RNG[id(b)]: rng.seed() diff --git a/pymc3/diagnostics.py b/pymc3/diagnostics.py index e44f27203a..4ae4fda450 100644 --- a/pymc3/diagnostics.py +++ b/pymc3/diagnostics.py @@ -5,12 +5,12 @@ from .util import get_default_varnames from .backends.base import MultiTrace -__all__ = ['geweke', 'gelman_rubin', 'effective_n'] +__all__ = ["geweke", "gelman_rubin", "effective_n"] @statfunc -def geweke(x, first=.1, last=.5, intervals=20): - R"""Return z-scores for convergence diagnostics. +def geweke(x, first=0.1, last=0.5, intervals=20): + r"""Return z-scores for convergence diagnostics. Compare the mean of the first % of series with the mean of the last % of series. x is divided into a number of segments for which this difference is @@ -58,14 +58,12 @@ def geweke(x, first=.1, last=.5, intervals=20): for interval in (first, last): if interval <= 0 or interval >= 1: raise ValueError( - "Invalid intervals for Geweke convergence analysis", - (first, - last)) + "Invalid intervals for Geweke convergence analysis", (first, last) + ) if first + last >= 1: raise ValueError( - "Invalid intervals for Geweke convergence analysis", - (first, - last)) + "Invalid intervals for Geweke convergence analysis", (first, last) + ) # Initialize list of z-scores zscores = [] @@ -77,14 +75,15 @@ def geweke(x, first=.1, last=.5, intervals=20): last_start_idx = (1 - last) * end # Calculate starting indices - start_indices = np.arange(0, int(last_start_idx), step=int( - (last_start_idx) / (intervals - 1))) + start_indices = np.arange( + 0, int(last_start_idx), step=int((last_start_idx) / (intervals - 1)) + ) # Loop over start indices for start in start_indices: # Calculate slices - first_slice = x[start: start + int(first * (end - start))] - last_slice = x[int(end - last * (end - start)):] + first_slice = x[start : start + int(first * (end - start))] + last_slice = x[int(end - last * (end - start)) :] z = first_slice.mean() - last_slice.mean() z /= np.sqrt(first_slice.var() + last_slice.var()) @@ -98,7 +97,7 @@ def geweke(x, first=.1, last=.5, intervals=20): def gelman_rubin(mtrace, varnames=None, include_transformed=False): - R"""Returns estimate of R for a set of traces. + r"""Returns estimate of R for a set of traces. The Gelman-Rubin diagnostic tests for lack of convergence by comparing the variance between multiple chains to the variance within each chain. @@ -160,11 +159,13 @@ def rscore(x, num_samples): if mtrace.nchains < 2: raise ValueError( - 'Gelman-Rubin diagnostic requires multiple chains ' - 'of the same length.') + "Gelman-Rubin diagnostic requires multiple chains " "of the same length." + ) if varnames is None: - varnames = get_default_varnames(mtrace.varnames, include_transformed=include_transformed) + varnames = get_default_varnames( + mtrace.varnames, include_transformed=include_transformed + ) Rhat = {} @@ -177,7 +178,7 @@ def rscore(x, num_samples): def effective_n(mtrace, varnames=None, include_transformed=False): - R"""Returns estimate of the effective sample size of a set of traces. + r"""Returns estimate of the effective sample size of a set of traces. Parameters ---------- @@ -221,23 +222,23 @@ def get_neff(x): acov = np.asarray([autocov(trace_value[chain]) for chain in range(nchain)]) chain_mean = trace_value.mean(axis=1) - chain_var = acov[:, 0] * n_samples / (n_samples - 1.) - acov_t = acov[:, 1] * n_samples / (n_samples - 1.) + chain_var = acov[:, 0] * n_samples / (n_samples - 1.0) + acov_t = acov[:, 1] * n_samples / (n_samples - 1.0) mean_var = np.mean(chain_var) - var_plus = mean_var * (n_samples - 1.) / n_samples + var_plus = mean_var * (n_samples - 1.0) / n_samples var_plus += np.var(chain_mean, ddof=1) rho_hat_t = np.zeros(n_samples) - rho_hat_even = 1. + rho_hat_even = 1.0 rho_hat_t[0] = rho_hat_even - rho_hat_odd = 1. - (mean_var - np.mean(acov_t)) / var_plus + rho_hat_odd = 1.0 - (mean_var - np.mean(acov_t)) / var_plus rho_hat_t[1] = rho_hat_odd # Geyer's initial positive sequence max_t = 1 t = 1 - while t < (n_samples - 2) and (rho_hat_even + rho_hat_odd) >= 0.: - rho_hat_even = 1. - (mean_var - np.mean(acov[:, t + 1])) / var_plus - rho_hat_odd = 1. - (mean_var - np.mean(acov[:, t + 2])) / var_plus + while t < (n_samples - 2) and (rho_hat_even + rho_hat_odd) >= 0.0: + rho_hat_even = 1.0 - (mean_var - np.mean(acov[:, t + 1])) / var_plus + rho_hat_odd = 1.0 - (mean_var - np.mean(acov[:, t + 2])) / var_plus if (rho_hat_even + rho_hat_odd) >= 0: rho_hat_t[t + 1] = rho_hat_even rho_hat_t[t + 2] = rho_hat_odd @@ -247,12 +248,14 @@ def get_neff(x): # Geyer's initial monotone sequence t = 3 while t <= max_t - 2: - if (rho_hat_t[t + 1] + rho_hat_t[t + 2]) > (rho_hat_t[t - 1] + rho_hat_t[t]): - rho_hat_t[t + 1] = (rho_hat_t[t - 1] + rho_hat_t[t]) / 2. + if (rho_hat_t[t + 1] + rho_hat_t[t + 2]) > ( + rho_hat_t[t - 1] + rho_hat_t[t] + ): + rho_hat_t[t + 1] = (rho_hat_t[t - 1] + rho_hat_t[t]) / 2.0 rho_hat_t[t + 2] = rho_hat_t[t + 1] t += 2 ess = nchain * n_samples - ess = ess / (-1. + 2. * np.sum(rho_hat_t)) + ess = ess / (-1.0 + 2.0 * np.sum(rho_hat_t)) return ess def generate_neff(trace_values): @@ -288,11 +291,14 @@ def generate_neff(trace_values): if mtrace.nchains < 2: raise ValueError( - 'Calculation of effective sample size requires multiple chains ' - 'of the same length.') + "Calculation of effective sample size requires multiple chains " + "of the same length." + ) if varnames is None: - varnames = get_default_varnames(mtrace.varnames,include_transformed=include_transformed) + varnames = get_default_varnames( + mtrace.varnames, include_transformed=include_transformed + ) n_eff = {} diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 3756a738ec..5787f88a8b 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -86,74 +86,75 @@ from .bound import Bound -__all__ = ['Uniform', - 'Flat', - 'HalfFlat', - 'TruncatedNormal', - 'Normal', - 'Beta', - 'Kumaraswamy', - 'Exponential', - 'Laplace', - 'StudentT', - 'Cauchy', - 'HalfCauchy', - 'Gamma', - 'Weibull', - 'Bound', - 'Lognormal', - 'HalfStudentT', - 'ChiSquared', - 'HalfNormal', - 'Wald', - 'Pareto', - 'InverseGamma', - 'ExGaussian', - 'VonMises', - 'Binomial', - 'BetaBinomial', - 'Bernoulli', - 'Poisson', - 'NegativeBinomial', - 'ConstantDist', - 'Constant', - 'ZeroInflatedPoisson', - 'ZeroInflatedNegativeBinomial', - 'ZeroInflatedBinomial', - 'DiscreteUniform', - 'Geometric', - 'Categorical', - 'OrderedLogistic', - 'DensityDist', - 'Distribution', - 'Continuous', - 'Discrete', - 'NoDistribution', - 'TensorType', - 'MvNormal', - 'MatrixNormal', - 'KroneckerNormal', - 'MvStudentT', - 'Dirichlet', - 'Multinomial', - 'Wishart', - 'WishartBartlett', - 'LKJCholeskyCov', - 'LKJCorr', - 'AR1', - 'AR', - 'GaussianRandomWalk', - 'MvGaussianRandomWalk', - 'MvStudentTRandomWalk', - 'GARCH11', - 'SkewNormal', - 'Mixture', - 'NormalMixture', - 'Triangular', - 'DiscreteWeibull', - 'Gumbel', - 'Logistic', - 'LogitNormal', - 'Interpolated', - 'Bound', - ] +__all__ = [ + "Uniform", + "Flat", + "HalfFlat", + "TruncatedNormal", + "Normal", + "Beta", + "Kumaraswamy", + "Exponential", + "Laplace", + "StudentT", + "Cauchy", + "HalfCauchy", + "Gamma", + "Weibull", + "Bound", + "Lognormal", + "HalfStudentT", + "ChiSquared", + "HalfNormal", + "Wald", + "Pareto", + "InverseGamma", + "ExGaussian", + "VonMises", + "Binomial", + "BetaBinomial", + "Bernoulli", + "Poisson", + "NegativeBinomial", + "ConstantDist", + "Constant", + "ZeroInflatedPoisson", + "ZeroInflatedNegativeBinomial", + "ZeroInflatedBinomial", + "DiscreteUniform", + "Geometric", + "Categorical", + "OrderedLogistic", + "DensityDist", + "Distribution", + "Continuous", + "Discrete", + "NoDistribution", + "TensorType", + "MvNormal", + "MatrixNormal", + "KroneckerNormal", + "MvStudentT", + "Dirichlet", + "Multinomial", + "Wishart", + "WishartBartlett", + "LKJCholeskyCov", + "LKJCorr", + "AR1", + "AR", + "GaussianRandomWalk", + "MvGaussianRandomWalk", + "MvStudentTRandomWalk", + "GARCH11", + "SkewNormal", + "Mixture", + "NormalMixture", + "Triangular", + "DiscreteWeibull", + "Gumbel", + "Logistic", + "LogitNormal", + "Interpolated", + "Bound", +] diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py index f2928c9702..192e773a10 100644 --- a/pymc3/distributions/bound.py +++ b/pymc3/distributions/bound.py @@ -5,11 +5,16 @@ import theano from pymc3.distributions.distribution import ( - Distribution, Discrete, Continuous, draw_values, generate_samples) + Distribution, + Discrete, + Continuous, + draw_values, + generate_samples, +) from pymc3.distributions import transforms from pymc3.distributions.dist_math import bound -__all__ = ['Bound'] +__all__ = ["Bound"] class _Bounded(Distribution): @@ -23,7 +28,7 @@ def __init__(self, distribution, lower, upper, default, *args, **kwargs): for name in defaults: setattr(self, name, getattr(self._wrapped, name)) else: - defaults = ('_default',) + defaults = ("_default",) self._default = default super(_Bounded, self).__init__( @@ -31,7 +36,8 @@ def __init__(self, distribution, lower, upper, default, *args, **kwargs): dtype=self._wrapped.dtype, testval=self._wrapped.testval, defaults=defaults, - transform=self._wrapped.transform) + transform=self._wrapped.transform, + ) def logp(self, value): logp = self._wrapped.logp(value) @@ -49,15 +55,17 @@ def _random(self, lower, upper, point=None, size=None): lower = np.asarray(lower) upper = np.asarray(upper) if lower.size > 1 or upper.size > 1: - raise ValueError('Drawing samples from distributions with ' - 'array-valued bounds is not supported.') + raise ValueError( + "Drawing samples from distributions with " + "array-valued bounds is not supported." + ) samples = np.zeros(size, dtype=self.dtype).flatten() i, n = 0, len(samples) while i < len(samples): sample = np.atleast_1d(self._wrapped.random(point=point, size=n)) select = sample[np.logical_and(sample >= lower, sample <= upper)] - samples[i:(i + len(select))] = select[:] + samples[i : (i + len(select))] = select[:] i += len(select) n -= len(select) if size is not None: @@ -70,28 +78,27 @@ def random(self, point=None, size=None): return self._wrapped.random(point=point, size=size) elif self.lower is not None and self.upper is not None: lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - return generate_samples(self._random, lower, upper, point, - dist_shape=self.shape, - size=size) + return generate_samples( + self._random, lower, upper, point, dist_shape=self.shape, size=size + ) elif self.lower is not None: lower = draw_values([self.lower], point=point, size=size) - return generate_samples(self._random, lower, np.inf, point, - dist_shape=self.shape, - size=size) + return generate_samples( + self._random, lower, np.inf, point, dist_shape=self.shape, size=size + ) else: upper = draw_values([self.upper], point=point, size=size) - return generate_samples(self._random, -np.inf, upper, point, - dist_shape=self.shape, - size=size) + return generate_samples( + self._random, -np.inf, upper, point, dist_shape=self.shape, size=size + ) class _DiscreteBounded(_Bounded, Discrete): - def __init__(self, distribution, lower, upper, - transform='infer', *args, **kwargs): - if transform == 'infer': + def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs): + if transform == "infer": transform = None if transform is not None: - raise ValueError('Can not transform discrete variable.') + raise ValueError("Can not transform discrete variable.") if lower is None and upper is None: default = None @@ -103,12 +110,17 @@ def __init__(self, distribution, lower, upper, default = lower + 1 super(_DiscreteBounded, self).__init__( - distribution=distribution, lower=lower, upper=upper, - default=default, *args, **kwargs) + distribution=distribution, + lower=lower, + upper=upper, + default=default, + *args, + **kwargs + ) class _ContinuousBounded(_Bounded, Continuous): - R""" + r""" An upper, lower or upper+lower bounded distribution Parameters @@ -125,16 +137,15 @@ class _ContinuousBounded(_Bounded, Continuous): See pymc3.distributions.transforms for more information. """ - def __init__(self, distribution, lower, upper, - transform='infer', *args, **kwargs): - dtype = kwargs.get('dtype', theano.config.floatX) + def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs): + dtype = kwargs.get("dtype", theano.config.floatX) if lower is not None: lower = tt.as_tensor_variable(lower).astype(dtype) if upper is not None: upper = tt.as_tensor_variable(upper).astype(dtype) - if transform == 'infer': + if transform == "infer": if lower is None and upper is None: transform = None default = None @@ -151,12 +162,18 @@ def __init__(self, distribution, lower, upper, default = None super(_ContinuousBounded, self).__init__( - distribution=distribution, lower=lower, upper=upper, - transform=transform, default=default, *args, **kwargs) + distribution=distribution, + lower=lower, + upper=upper, + transform=transform, + default=default, + *args, + **kwargs + ) class Bound(object): - R""" + r""" Create a Bound variable object that can be applied to create a new upper, lower, or upper and lower bounded distribution. @@ -207,30 +224,35 @@ def __init__(self, distribution, lower=None, upper=None): self.upper = upper def __call__(self, name, *args, **kwargs): - if 'observed' in kwargs: - raise ValueError('Observed Bound distributions are not supported. ' - 'If you want to model truncated data ' - 'you can use a pm.Potential in combination ' - 'with the cumulative probability function. See ' - 'pymc3/examples/censored_data.py for an example.') + if "observed" in kwargs: + raise ValueError( + "Observed Bound distributions are not supported. " + "If you want to model truncated data " + "you can use a pm.Potential in combination " + "with the cumulative probability function. See " + "pymc3/examples/censored_data.py for an example." + ) if issubclass(self.distribution, Continuous): - return _ContinuousBounded(name, self.distribution, - self.lower, self.upper, *args, **kwargs) + return _ContinuousBounded( + name, self.distribution, self.lower, self.upper, *args, **kwargs + ) elif issubclass(self.distribution, Discrete): - return _DiscreteBounded(name, self.distribution, - self.lower, self.upper, *args, **kwargs) + return _DiscreteBounded( + name, self.distribution, self.lower, self.upper, *args, **kwargs + ) else: - raise ValueError( - 'Distribution is neither continuous nor discrete.') + raise ValueError("Distribution is neither continuous nor discrete.") def dist(self, *args, **kwargs): if issubclass(self.distribution, Continuous): return _ContinuousBounded.dist( - self.distribution, self.lower, self.upper, *args, **kwargs) + self.distribution, self.lower, self.upper, *args, **kwargs + ) elif issubclass(self.distribution, Discrete): return _DiscreteBounded.dist( - self.distribution, self.lower, self.upper, *args, **kwargs) + self.distribution, self.lower, self.upper, *args, **kwargs + ) else: - raise ValueError('Distribution is neither continuous nor discrete.') + raise ValueError("Distribution is neither continuous nor discrete.") diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 984ca3e577..bb66eeceaa 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -20,45 +20,78 @@ from .special import log_i0 from ..math import invlogit, logit, logdiffexp from .dist_math import ( - alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow, - normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue, + alltrue_elemwise, + betaln, + bound, + gammaln, + i0e, + incomplete_beta, + logpow, + normal_lccdf, + normal_lcdf, + SplineWrapper, + std_cdf, + zvalue, ) from .distribution import Continuous, draw_values, generate_samples -__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta', - 'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy', - 'HalfCauchy', 'Gamma', 'Weibull', 'HalfStudentT', 'Lognormal', - 'ChiSquared', 'HalfNormal', 'Wald', 'Pareto', 'InverseGamma', - 'ExGaussian', 'VonMises', 'SkewNormal', 'Triangular', 'Gumbel', - 'Logistic', 'LogitNormal', 'Interpolated', 'Rice'] +__all__ = [ + "Uniform", + "Flat", + "HalfFlat", + "Normal", + "TruncatedNormal", + "Beta", + "Kumaraswamy", + "Exponential", + "Laplace", + "StudentT", + "Cauchy", + "HalfCauchy", + "Gamma", + "Weibull", + "HalfStudentT", + "Lognormal", + "ChiSquared", + "HalfNormal", + "Wald", + "Pareto", + "InverseGamma", + "ExGaussian", + "VonMises", + "SkewNormal", + "Triangular", + "Gumbel", + "Logistic", + "LogitNormal", + "Interpolated", + "Rice", +] class PositiveContinuous(Continuous): """Base class for positive continuous distributions""" def __init__(self, transform=transforms.log, *args, **kwargs): - super(PositiveContinuous, self).__init__( - transform=transform, *args, **kwargs) + super(PositiveContinuous, self).__init__(transform=transform, *args, **kwargs) class UnitContinuous(Continuous): """Base class for continuous distributions on [0,1]""" def __init__(self, transform=transforms.logodds, *args, **kwargs): - super(UnitContinuous, self).__init__( - transform=transform, *args, **kwargs) + super(UnitContinuous, self).__init__(transform=transform, *args, **kwargs) class BoundedContinuous(Continuous): """Base class for bounded continuous distributions""" - def __init__(self, transform='auto', lower=None, upper=None, - *args, **kwargs): + def __init__(self, transform="auto", lower=None, upper=None, *args, **kwargs): lower = tt.as_tensor_variable(lower) if lower is not None else None upper = tt.as_tensor_variable(upper) if upper is not None else None - if transform == 'auto': + if transform == "auto": if lower is None and upper is None: transform = None elif lower is not None and upper is None: @@ -68,8 +101,7 @@ def __init__(self, transform='auto', lower=None, upper=None, else: transform = transforms.interval(lower, upper) - super(BoundedContinuous, self).__init__( - transform=transform, *args, **kwargs) + super(BoundedContinuous, self).__init__(transform=transform, *args, **kwargs) def assert_negative_support(var, label, distname, value=-1e-6): @@ -78,8 +110,9 @@ def assert_negative_support(var, label, distname, value=-1e-6): return try: # Transformed distribution - support = np.isfinite(var.transformed.distribution.dist - .logp(value).tag.test_value) + support = np.isfinite( + var.transformed.distribution.dist.logp(value).tag.test_value + ) except AttributeError: try: # Untransformed distribution @@ -90,7 +123,8 @@ def assert_negative_support(var, label, distname, value=-1e-6): if np.any(support): msg = "The variable specified for {0} has negative support for {1}, ".format( - label, distname) + label, distname + ) msg += "likely making it unsuitable for this parameter." warnings.warn(msg) @@ -118,27 +152,27 @@ def get_tau_sd(tau=None, sd=None): """ if tau is None: if sd is None: - sd = 1. - tau = 1. + sd = 1.0 + tau = 1.0 else: - tau = sd**-2. + tau = sd ** -2.0 else: if sd is not None: raise ValueError("Can't pass both tau and sd") else: - sd = tau**-.5 + sd = tau ** -0.5 # cast tau and sd to float in a way that works for both np.arrays # and pure python - tau = 1. * tau - sd = 1. * sd + tau = 1.0 * tau + sd = 1.0 * sd return floatX(tau), floatX(sd) class Uniform(BoundedContinuous): - R""" + r""" Continuous uniform log-likelihood. The pdf of this distribution is @@ -182,11 +216,10 @@ class Uniform(BoundedContinuous): def __init__(self, lower=0, upper=1, *args, **kwargs): self.lower = lower = tt.as_tensor_variable(floatX(lower)) self.upper = upper = tt.as_tensor_variable(floatX(upper)) - self.mean = (upper + lower) / 2. + self.mean = (upper + lower) / 2.0 self.median = self.mean - super(Uniform, self).__init__( - lower=lower, upper=upper, *args, **kwargs) + super(Uniform, self).__init__(lower=lower, upper=upper, *args, **kwargs) def random(self, point=None, size=None): """ @@ -206,12 +239,14 @@ def random(self, point=None, size=None): array """ - lower, upper = draw_values([self.lower, self.upper], - point=point, size=size) - return generate_samples(stats.uniform.rvs, loc=lower, - scale=upper - lower, - dist_shape=self.shape, - size=size) + lower, upper = draw_values([self.lower, self.upper], point=point, size=size) + return generate_samples( + stats.uniform.rvs, + loc=lower, + scale=upper - lower, + dist_shape=self.shape, + size=size, + ) def logp(self, value): """ @@ -228,17 +263,17 @@ def logp(self, value): """ lower = self.lower upper = self.upper - return bound(-tt.log(upper - lower), - value >= lower, value <= upper) + return bound(-tt.log(upper - lower), value >= lower, value <= upper) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self lower = dist.lower upper = dist.upper - name = r'\text{%s}' % name - return r'${} \sim \text{{Uniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$'.format( - name, get_variable_name(lower), get_variable_name(upper)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Uniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$".format( + name, get_variable_name(lower), get_variable_name(upper) + ) def logcdf(self, value): return tt.switch( @@ -247,9 +282,8 @@ def logcdf(self, value): tt.switch( tt.eq(value, self.upper), 0, - tt.log((value - self.lower)) - - tt.log((self.upper - self.lower)) - ) + tt.log((value - self.lower)) - tt.log((self.upper - self.lower)), + ), ) @@ -261,7 +295,7 @@ class Flat(Continuous): def __init__(self, *args, **kwargs): self._default = 0 - super(Flat, self).__init__(defaults=('_default',), *args, **kwargs) + super(Flat, self).__init__(defaults=("_default",), *args, **kwargs) def random(self, point=None, size=None): """Raises ValueError as it is not possible to sample from Flat distribution @@ -275,7 +309,7 @@ def random(self, point=None, size=None): ------- ValueError """ - raise ValueError('Cannot sample from Flat distribution') + raise ValueError("Cannot sample from Flat distribution") def logp(self, value): """ @@ -294,18 +328,14 @@ def logp(self, value): return tt.zeros_like(value) def _repr_latex_(self, name=None, dist=None): - name = r'\text{%s}' % name - return r'${} \sim \text{{Flat}}()$'.format(name) + name = r"\text{%s}" % name + return r"${} \sim \text{{Flat}}()$".format(name) def logcdf(self, value): return tt.switch( tt.eq(value, -np.inf), -np.inf, - tt.switch( - tt.eq(value, np.inf), - 0, - tt.log(0.5) - ) + tt.switch(tt.eq(value, np.inf), 0, tt.log(0.5)), ) @@ -314,7 +344,7 @@ class HalfFlat(PositiveContinuous): def __init__(self, *args, **kwargs): self._default = 1 - super(HalfFlat, self).__init__(defaults=('_default',), *args, **kwargs) + super(HalfFlat, self).__init__(defaults=("_default",), *args, **kwargs) def random(self, point=None, size=None): """Raises ValueError as it is not possible to sample from HalfFlat distribution @@ -328,7 +358,7 @@ def random(self, point=None, size=None): ------- ValueError """ - raise ValueError('Cannot sample from HalfFlat distribution') + raise ValueError("Cannot sample from HalfFlat distribution") def logp(self, value): """ @@ -347,23 +377,17 @@ def logp(self, value): return bound(tt.zeros_like(value), value > 0) def _repr_latex_(self, name=None, dist=None): - name = r'\text{%s}' % name - return r'${} \sim \text{{HalfFlat}}()$'.format(name) + name = r"\text{%s}" % name + return r"${} \sim \text{{HalfFlat}}()$".format(name) def logcdf(self, value): return tt.switch( - tt.lt(value, np.inf), - -np.inf, - tt.switch( - tt.eq(value, np.inf), - 0, - -np.inf - ) + tt.lt(value, np.inf), -np.inf, tt.switch(tt.eq(value, np.inf), 0, -np.inf) ) class Normal(Continuous): - R""" + r""" Univariate normal log-likelihood. The pdf of this distribution is @@ -431,10 +455,10 @@ def __init__(self, mu=0, sd=None, tau=None, **kwargs): self.tau = tt.as_tensor_variable(tau) self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu) - self.variance = 1. / self.tau + self.variance = 1.0 / self.tau - assert_negative_support(sd, 'sd', 'Normal') - assert_negative_support(tau, 'tau', 'Normal') + assert_negative_support(sd, "sd", "Normal") + assert_negative_support(tau, "tau", "Normal") super(Normal, self).__init__(**kwargs) @@ -455,11 +479,10 @@ def random(self, point=None, size=None): ------- array """ - mu, tau, _ = draw_values([self.mu, self.tau, self.sd], - point=point, size=size) - return generate_samples(stats.norm.rvs, loc=mu, scale=tau**-0.5, - dist_shape=self.shape, - size=size) + mu, tau, _ = draw_values([self.mu, self.tau, self.sd], point=point, size=size) + return generate_samples( + stats.norm.rvs, loc=mu, scale=tau ** -0.5, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -479,25 +502,26 @@ def logp(self, value): tau = self.tau mu = self.mu - return bound((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2., - sd > 0) + return bound( + (-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, sd > 0 + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self sd = dist.sd mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(sd)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$".format( + name, get_variable_name(mu), get_variable_name(sd) + ) def logcdf(self, value): return normal_lcdf(self.mu, self.sd, value) class TruncatedNormal(BoundedContinuous): - R""" + r""" Univariate truncated normal log-likelihood. The pdf of this distribution is @@ -570,8 +594,17 @@ class TruncatedNormal(BoundedContinuous): """ - def __init__(self, mu=0, sd=None, tau=None, lower=None, upper=None, - transform='auto', *args, **kwargs): + def __init__( + self, + mu=0, + sd=None, + tau=None, + lower=None, + upper=None, + transform="auto", + *args, + **kwargs + ): tau, sd = get_tau_sd(tau=tau, sd=sd) self.sd = tt.as_tensor_variable(sd) self.tau = tt.as_tensor_variable(tau) @@ -582,18 +615,23 @@ def __init__(self, mu=0, sd=None, tau=None, lower=None, upper=None, if self.lower is None and self.upper is None: self._defaultval = mu elif self.lower is None and self.upper is not None: - self._defaultval = self.upper - 1. + self._defaultval = self.upper - 1.0 elif self.lower is not None and self.upper is None: - self._defaultval = self.lower + 1. + self._defaultval = self.lower + 1.0 else: self._defaultval = (self.lower + self.upper) / 2 - assert_negative_support(sd, 'sd', 'TruncatedNormal') - assert_negative_support(tau, 'tau', 'TruncatedNormal') + assert_negative_support(sd, "sd", "TruncatedNormal") + assert_negative_support(tau, "tau", "TruncatedNormal") super(TruncatedNormal, self).__init__( - defaults=('_defaultval',), transform=transform, - lower=lower, upper=upper, *args, **kwargs) + defaults=("_defaultval",), + transform=transform, + lower=lower, + upper=upper, + *args, + **kwargs + ) def random(self, point=None, size=None): """ @@ -613,15 +651,17 @@ def random(self, point=None, size=None): array """ mu_v, std_v, a_v, b_v = draw_values( - [self.mu, self.sd, self.lower, self.upper], point=point, size=size) - return generate_samples(stats.truncnorm.rvs, - a=(a_v - mu_v)/std_v, - b=(b_v - mu_v) / std_v, - loc=mu_v, - scale=std_v, - dist_shape=self.shape, - size=size, - ) + [self.mu, self.sd, self.lower, self.upper], point=point, size=size + ) + return generate_samples( + stats.truncnorm.rvs, + a=(a_v - mu_v) / std_v, + b=(b_v - mu_v) / std_v, + loc=mu_v, + scale=std_v, + dist_shape=self.shape, + size=size, + ) def logp(self, value): """ @@ -654,7 +694,7 @@ def _normalization(self): mu, sd = self.mu, self.sd if self.lower is None and self.upper is None: - return 0. + return 0.0 if self.lower is not None and self.upper is not None: lcdf_a = normal_lcdf(mu, sd, self.lower) @@ -663,9 +703,7 @@ def _normalization(self): lsf_b = normal_lccdf(mu, sd, self.upper) return tt.switch( - self.lower > 0, - logdiffexp(lsf_a, lsf_b), - logdiffexp(lcdf_b, lcdf_a), + self.lower > 0, logdiffexp(lsf_a, lsf_b), logdiffexp(lcdf_b, lcdf_a) ) if self.lower is not None: @@ -676,11 +714,10 @@ def _normalization(self): def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self - name = r'\text{%s}' % name + name = r"\text{%s}" % name return ( - r'${} \sim \text{{TruncatedNormal}}(' - '\mathit{{mu}}={},~\mathit{{sd}}={},a={},b={})$' - .format( + r"${} \sim \text{{TruncatedNormal}}(" + "\mathit{{mu}}={},~\mathit{{sd}}={},a={},b={})$".format( name, get_variable_name(self.mu), get_variable_name(self.sd), @@ -691,7 +728,7 @@ def _repr_latex_(self, name=None, dist=None): class HalfNormal(PositiveContinuous): - R""" + r""" Half-normal log-likelihood. The pdf of this distribution is @@ -761,10 +798,10 @@ def __init__(self, sd=None, tau=None, *args, **kwargs): self.tau = tau = tt.as_tensor_variable(tau) self.mean = tt.sqrt(2 / (np.pi * self.tau)) - self.variance = (1. - 2 / np.pi) / self.tau + self.variance = (1.0 - 2 / np.pi) / self.tau - assert_negative_support(tau, 'tau', 'HalfNormal') - assert_negative_support(sd, 'sd', 'HalfNormal') + assert_negative_support(tau, "tau", "HalfNormal") + assert_negative_support(sd, "sd", "HalfNormal") def random(self, point=None, size=None): """ @@ -784,9 +821,9 @@ def random(self, point=None, size=None): array """ sd = draw_values([self.sd], point=point)[0] - return generate_samples(stats.halfnorm.rvs, loc=0., scale=sd, - dist_shape=self.shape, - size=size) + return generate_samples( + stats.halfnorm.rvs, loc=0.0, scale=sd, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -804,30 +841,34 @@ def logp(self, value): """ tau = self.tau sd = self.sd - return bound(-0.5 * tau * value**2 + 0.5 * tt.log(tau * 2. / np.pi), - value >= 0, - tau > 0, sd > 0) + return bound( + -0.5 * tau * value ** 2 + 0.5 * tt.log(tau * 2.0 / np.pi), + value >= 0, + tau > 0, + sd > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self sd = dist.sd - name = r'\text{%s}' % name - return r'${} \sim \text{{HalfNormal}}(\mathit{{sd}}={})$'.format(name, - get_variable_name(sd)) + name = r"\text{%s}" % name + return r"${} \sim \text{{HalfNormal}}(\mathit{{sd}}={})$".format( + name, get_variable_name(sd) + ) def logcdf(self, value): sd = self.sd z = zvalue(value, mu=0, sd=sd) return tt.switch( tt.lt(z, -1.0), - tt.log(tt.erfcx(-z / tt.sqrt(2.))) - tt.sqr(z), - tt.log1p(-tt.erfc(z / tt.sqrt(2.))) + tt.log(tt.erfcx(-z / tt.sqrt(2.0))) - tt.sqr(z), + tt.log1p(-tt.erfc(z / tt.sqrt(2.0))), ) class Wald(PositiveContinuous): - R""" + r""" Wald log-likelihood. The pdf of this distribution is @@ -904,7 +945,7 @@ class Wald(PositiveContinuous): statmod: Probability Calculations for the Inverse Gaussian Distribution """ - def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): + def __init__(self, mu=None, lam=None, phi=None, alpha=0.0, *args, **kwargs): super(Wald, self).__init__(*args, **kwargs) mu, lam, phi = self.get_mu_lam_phi(mu, lam, phi) self.alpha = alpha = tt.as_tensor_variable(alpha) @@ -913,13 +954,19 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): self.phi = phi = tt.as_tensor_variable(phi) self.mean = self.mu + self.alpha - self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2) - - 1.5 * self.mu / self.lam) + self.alpha - self.variance = (self.mu**3) / self.lam + self.mode = ( + self.mu + * ( + tt.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2) + - 1.5 * self.mu / self.lam + ) + + self.alpha + ) + self.variance = (self.mu ** 3) / self.lam - assert_negative_support(phi, 'phi', 'Wald') - assert_negative_support(mu, 'mu', 'Wald') - assert_negative_support(lam, 'lam', 'Wald') + assert_negative_support(phi, "phi", "Wald") + assert_negative_support(mu, "mu", "Wald") + assert_negative_support(lam, "lam", "Wald") def get_mu_lam_phi(self, mu, lam, phi): if mu is None: @@ -928,23 +975,28 @@ def get_mu_lam_phi(self, mu, lam, phi): else: if lam is None: if phi is None: - return mu, 1., 1. / mu + return mu, 1.0, 1.0 / mu else: return mu, mu * phi, phi else: if phi is None: return mu, lam, lam / mu - raise ValueError('Wald distribution must specify either mu only, ' - 'mu and lam, mu and phi, or lam and phi.') + raise ValueError( + "Wald distribution must specify either mu only, " + "mu and lam, mu and phi, or lam and phi." + ) def _random(self, mu, lam, alpha, size=None): - v = np.random.normal(size=size)**2 - value = (mu + (mu**2) * v / (2. * lam) - mu / (2. * lam) - * np.sqrt(4. * mu * lam * v + (mu * v)**2)) + v = np.random.normal(size=size) ** 2 + value = ( + mu + + (mu ** 2) * v / (2.0 * lam) + - mu / (2.0 * lam) * np.sqrt(4.0 * mu * lam * v + (mu * v) ** 2) + ) z = np.random.uniform(size=size) i = np.floor(z - mu / (mu + value)) * 2 + 1 - value = (value**-i) * (mu**(i + 1)) + value = (value ** -i) * (mu ** (i + 1)) return value + alpha def random(self, point=None, size=None): @@ -964,12 +1016,12 @@ def random(self, point=None, size=None): ------- array """ - mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha], - point=point, size=size) - return generate_samples(self._random, - mu, lam, alpha, - dist_shape=self.shape, - size=size) + mu, lam, alpha = draw_values( + [self.mu, self.lam, self.alpha], point=point, size=size + ) + return generate_samples( + self._random, mu, lam, alpha, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -989,13 +1041,17 @@ def logp(self, value): lam = self.lam alpha = self.alpha # value *must* be iid. Otherwise this is wrong. - return bound(logpow(lam / (2. * np.pi), 0.5) - - logpow(value - alpha, 1.5) - - (0.5 * lam / (value - alpha) - * ((value - alpha - mu) / mu)**2), - # XXX these two are redundant. Please, check. - value > 0, value - alpha > 0, - mu > 0, lam > 0, alpha >= 0) + return bound( + logpow(lam / (2.0 * np.pi), 0.5) + - logpow(value - alpha, 1.5) + - (0.5 * lam / (value - alpha) * ((value - alpha - mu) / mu) ** 2), + # XXX these two are redundant. Please, check. + value > 0, + value - alpha > 0, + mu > 0, + lam > 0, + alpha >= 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -1003,11 +1059,13 @@ def _repr_latex_(self, name=None, dist=None): lam = dist.lam mu = dist.mu alpha = dist.alpha - name = r'\text{%s}' % name - return r'${} \sim \text{{Wald}}(\mathit{{mu}}={},~\mathit{{lam}}={},~\mathit{{alpha}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(lam), - get_variable_name(alpha)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Wald}}(\mathit{{mu}}={},~\mathit{{lam}}={},~\mathit{{alpha}}={})$".format( + name, + get_variable_name(mu), + get_variable_name(lam), + get_variable_name(alpha), + ) def logcdf(self, value): # Distribution parameters @@ -1020,38 +1078,35 @@ def logcdf(self, value): l = lam * mu r = tt.sqrt(value * lam) - a = normal_lcdf(0, 1, (q - 1.)/r) - b = 2./l + normal_lcdf(0, 1, -(q + 1.)/r) + a = normal_lcdf(0, 1, (q - 1.0) / r) + b = 2.0 / l + normal_lcdf(0, 1, -(q + 1.0) / r) return tt.switch( ( # Left limit - tt.lt(value, 0) | - (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) | - (tt.lt(value, mu) & tt.eq(lam, 0)) + tt.lt(value, 0) + | (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) + | (tt.lt(value, mu) & tt.eq(lam, 0)) ), -np.inf, tt.switch( ( # Right limit - tt.eq(value, np.inf) | - (tt.eq(lam, 0) & tt.gt(value, mu)) | - (tt.gt(value, 0) & tt.eq(lam, np.inf)) | + tt.eq(value, np.inf) + | (tt.eq(lam, 0) & tt.gt(value, mu)) + | (tt.gt(value, 0) & tt.eq(lam, np.inf)) + | # Degenerate distribution - ( - tt.lt(mu, np.inf) & - tt.eq(mu, value) & - tt.eq(lam, 0) - ) | - (tt.eq(value, 0) & tt.eq(lam, np.inf)) + (tt.lt(mu, np.inf) & tt.eq(mu, value) & tt.eq(lam, 0)) + | (tt.eq(value, 0) & tt.eq(lam, np.inf)) ), 0, - a + tt.log1p(tt.exp(b - a)) - ) + a + tt.log1p(tt.exp(b - a)), + ), ) class Beta(UnitContinuous): - R""" + r""" Beta log-likelihood. The pdf of this distribution is @@ -1113,8 +1168,7 @@ class Beta(UnitContinuous): the binomial distribution. """ - def __init__(self, alpha=None, beta=None, mu=None, sd=None, - *args, **kwargs): + def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): super(Beta, self).__init__(*args, **kwargs) alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) @@ -1122,22 +1176,27 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, self.beta = beta = tt.as_tensor_variable(beta) self.mean = self.alpha / (self.alpha + self.beta) - self.variance = self.alpha * self.beta / ( - (self.alpha + self.beta)**2 * (self.alpha + self.beta + 1)) + self.variance = ( + self.alpha + * self.beta + / ((self.alpha + self.beta) ** 2 * (self.alpha + self.beta + 1)) + ) - assert_negative_support(alpha, 'alpha', 'Beta') - assert_negative_support(beta, 'beta', 'Beta') + assert_negative_support(alpha, "alpha", "Beta") + assert_negative_support(beta, "beta", "Beta") def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): if (alpha is not None) and (beta is not None): pass elif (mu is not None) and (sd is not None): - kappa = mu * (1 - mu) / sd**2 - 1 + kappa = mu * (1 - mu) / sd ** 2 - 1 alpha = mu * kappa beta = (1 - mu) * kappa else: - raise ValueError('Incompatible parameterization. Either use alpha ' - 'and beta, or mu and sd to specify distribution.') + raise ValueError( + "Incompatible parameterization. Either use alpha " + "and beta, or mu and sd to specify distribution." + ) return alpha, beta @@ -1158,11 +1217,10 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta = draw_values([self.alpha, self.beta], - point=point, size=size) - return generate_samples(stats.beta.rvs, alpha, beta, - dist_shape=self.shape, - size=size) + alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) + return generate_samples( + stats.beta.rvs, alpha, beta, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -1183,13 +1241,13 @@ def logp(self, value): logval = tt.log(value) log1pval = tt.log1p(-value) - logp = (tt.switch(tt.eq(alpha, 1), 0, (alpha - 1) * logval) - + tt.switch(tt.eq(beta, 1), 0, (beta - 1) * log1pval) - - betaln(alpha, beta)) + logp = ( + tt.switch(tt.eq(alpha, 1), 0, (alpha - 1) * logval) + + tt.switch(tt.eq(beta, 1), 0, (beta - 1) * log1pval) + - betaln(alpha, beta) + ) - return bound(logp, - value >= 0, value <= 1, - alpha > 0, beta > 0) + return bound(logp, value >= 0, value <= 1, alpha > 0, beta > 0) def logcdf(self, value): value = floatX(tt.as_tensor(value)) @@ -1198,11 +1256,7 @@ def logcdf(self, value): return tt.switch( tt.le(value, 0), -np.inf, - tt.switch( - tt.ge(value, 1), - 0, - tt.log(incomplete_beta(a, b, value)) - ) + tt.switch(tt.ge(value, 1), 0, tt.log(incomplete_beta(a, b, value))), ) def _repr_latex_(self, name=None, dist=None): @@ -1210,13 +1264,14 @@ def _repr_latex_(self, name=None, dist=None): dist = self alpha = dist.alpha beta = dist.beta - name = r'\text{%s}' % name - return r'${} \sim \text{{Beta}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Beta}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(alpha), get_variable_name(beta) + ) + class Kumaraswamy(UnitContinuous): - R""" + r""" Kumaraswamy log-likelihood. The pdf of this distribution is @@ -1263,13 +1318,23 @@ def __init__(self, a, b, *args, **kwargs): self.a = a = tt.as_tensor_variable(a) self.b = b = tt.as_tensor_variable(b) - ln_mean = tt.log(b) + tt.gammaln(1 + 1 / a) + tt.gammaln(b) - tt.gammaln(1 + 1 / a + b) + ln_mean = ( + tt.log(b) + + tt.gammaln(1 + 1 / a) + + tt.gammaln(b) + - tt.gammaln(1 + 1 / a + b) + ) self.mean = tt.exp(ln_mean) - ln_2nd_raw_moment = tt.log(b) + tt.gammaln(1 + 2 / a) + tt.gammaln(b) - tt.gammaln(1 + 2 / a + b) + ln_2nd_raw_moment = ( + tt.log(b) + + tt.gammaln(1 + 2 / a) + + tt.gammaln(b) + - tt.gammaln(1 + 2 / a + b) + ) self.variance = tt.exp(ln_2nd_raw_moment) - self.mean ** 2 - assert_negative_support(a, 'a', 'Kumaraswamy') - assert_negative_support(b, 'b', 'Kumaraswamy') + assert_negative_support(a, "a", "Kumaraswamy") + assert_negative_support(b, "b", "Kumaraswamy") def _random(self, a, b, size=None): u = np.random.uniform(size=size) @@ -1292,11 +1357,8 @@ def random(self, point=None, size=None): ------- array """ - a, b = draw_values([self.a, self.b], - point=point, size=size) - return generate_samples(self._random, a, b, - dist_shape=self.shape, - size=size) + a, b = draw_values([self.a, self.b], point=point, size=size) + return generate_samples(self._random, a, b, dist_shape=self.shape, size=size) def logp(self, value): """ @@ -1315,25 +1377,28 @@ def logp(self, value): a = self.a b = self.b - logp = tt.log(a) + tt.log(b) + (a - 1) * tt.log(value) + (b - 1) * tt.log(1 - value ** a) + logp = ( + tt.log(a) + + tt.log(b) + + (a - 1) * tt.log(value) + + (b - 1) * tt.log(1 - value ** a) + ) - return bound(logp, - value >= 0, value <= 1, - a > 0, b > 0) + return bound(logp, value >= 0, value <= 1, a > 0, b > 0) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self a = dist.a b = dist.b - name = r'\text{%s}' % name - return r'${} \sim \text{{Kumaraswamy}}(\mathit{{a}}={},~\mathit{{b}}={})$'.format(name, - get_variable_name(a), - get_variable_name(b)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Kumaraswamy}}(\mathit{{a}}={},~\mathit{{b}}={})$".format( + name, get_variable_name(a), get_variable_name(b) + ) class Exponential(PositiveContinuous): - R""" + r""" Exponential log-likelihood. The pdf of this distribution is @@ -1372,13 +1437,13 @@ class Exponential(PositiveContinuous): def __init__(self, lam, *args, **kwargs): super(Exponential, self).__init__(*args, **kwargs) self.lam = lam = tt.as_tensor_variable(lam) - self.mean = 1. / self.lam + self.mean = 1.0 / self.lam self.median = self.mean * tt.log(2) self.mode = tt.zeros_like(self.lam) - self.variance = self.lam**-2 + self.variance = self.lam ** -2 - assert_negative_support(lam, 'lam', 'Exponential') + assert_negative_support(lam, "lam", "Exponential") def random(self, point=None, size=None): """ @@ -1398,9 +1463,9 @@ def random(self, point=None, size=None): array """ lam = draw_values([self.lam], point=point, size=size)[0] - return generate_samples(np.random.exponential, scale=1. / lam, - dist_shape=self.shape, - size=size) + return generate_samples( + np.random.exponential, scale=1.0 / lam, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -1423,9 +1488,10 @@ def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self lam = dist.lam - name = r'\text{%s}' % name - return r'${} \sim \text{{Exponential}}(\mathit{{lam}}={})$'.format(name, - get_variable_name(lam)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Exponential}}(\mathit{{lam}}={})$".format( + name, get_variable_name(lam) + ) def logcdf(self, value): """ @@ -1444,15 +1510,13 @@ def logcdf(self, value): tt.le(value, 0.0), -np.inf, tt.switch( - tt.le(a, tt.log(2.0)), - tt.log(-tt.expm1(-a)), - tt.log1p(-tt.exp(-a)), - ) + tt.le(a, tt.log(2.0)), tt.log(-tt.expm1(-a)), tt.log1p(-tt.exp(-a)) + ), ) class Laplace(Continuous): - R""" + r""" Laplace log-likelihood. The pdf of this distribution is @@ -1498,9 +1562,9 @@ def __init__(self, mu, b, *args, **kwargs): self.b = b = tt.as_tensor_variable(b) self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu) - self.variance = 2 * self.b**2 + self.variance = 2 * self.b ** 2 - assert_negative_support(b, 'b', 'Laplace') + assert_negative_support(b, "b", "Laplace") def random(self, point=None, size=None): """ @@ -1520,9 +1584,9 @@ def random(self, point=None, size=None): array """ mu, b = draw_values([self.mu, self.b], point=point, size=size) - return generate_samples(np.random.laplace, mu, b, - dist_shape=self.shape, - size=size) + return generate_samples( + np.random.laplace, mu, b, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -1548,10 +1612,10 @@ def _repr_latex_(self, name=None, dist=None): dist = self b = dist.b mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{Laplace}}(\mathit{{mu}}={},~\mathit{{b}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(b)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Laplace}}(\mathit{{mu}}={},~\mathit{{b}}={})$".format( + name, get_variable_name(mu), get_variable_name(b) + ) def logcdf(self, value): a = self.mu @@ -1561,15 +1625,13 @@ def logcdf(self, value): tt.le(value, a), tt.log(0.5) + y, tt.switch( - tt.gt(y, 1), - tt.log1p(-0.5 * tt.exp(-y)), - tt.log(1 - 0.5 * tt.exp(-y)) - ) + tt.gt(y, 1), tt.log1p(-0.5 * tt.exp(-y)), tt.log(1 - 0.5 * tt.exp(-y)) + ), ) class Lognormal(PositiveContinuous): - R""" + r""" Log-normal log-likelihood. Distribution of any random variable whose logarithm is normally @@ -1637,17 +1699,19 @@ def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs): self.tau = tau = tt.as_tensor_variable(tau) self.sd = sd = tt.as_tensor_variable(sd) - self.mean = tt.exp(self.mu + 1. / (2 * self.tau)) + self.mean = tt.exp(self.mu + 1.0 / (2 * self.tau)) self.median = tt.exp(self.mu) - self.mode = tt.exp(self.mu - 1. / self.tau) - self.variance = (tt.exp(1. / self.tau) - 1) * tt.exp(2 * self.mu + 1. / self.tau) + self.mode = tt.exp(self.mu - 1.0 / self.tau) + self.variance = (tt.exp(1.0 / self.tau) - 1) * tt.exp( + 2 * self.mu + 1.0 / self.tau + ) - assert_negative_support(tau, 'tau', 'Lognormal') - assert_negative_support(sd, 'sd', 'Lognormal') + assert_negative_support(tau, "tau", "Lognormal") + assert_negative_support(sd, "sd", "Lognormal") def _random(self, mu, tau, size=None): samples = np.random.normal(size=size) - return np.exp(mu + (tau**-0.5) * samples) + return np.exp(mu + (tau ** -0.5) * samples) def random(self, point=None, size=None): """ @@ -1667,9 +1731,7 @@ def random(self, point=None, size=None): array """ mu, tau = draw_values([self.mu, self.tau], point=point, size=size) - return generate_samples(self._random, mu, tau, - dist_shape=self.shape, - size=size) + return generate_samples(self._random, mu, tau, dist_shape=self.shape, size=size) def logp(self, value): """ @@ -1687,20 +1749,22 @@ def logp(self, value): """ mu = self.mu tau = self.tau - return bound(-0.5 * tau * (tt.log(value) - mu)**2 - + 0.5 * tt.log(tau / (2. * np.pi)) - - tt.log(value), - tau > 0) + return bound( + -0.5 * tau * (tt.log(value) - mu) ** 2 + + 0.5 * tt.log(tau / (2.0 * np.pi)) + - tt.log(value), + tau > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self tau = dist.tau mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{Lognormal}}(\mathit{{mu}}={},~\mathit{{tau}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(tau)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Lognormal}}(\mathit{{mu}}={},~\mathit{{tau}}={})$".format( + name, get_variable_name(mu), get_variable_name(tau) + ) def logcdf(self, value): mu = self.mu @@ -1712,15 +1776,14 @@ def logcdf(self, value): -np.inf, tt.switch( tt.lt(z, -1.0), - tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - - tt.sqr(z) / 2, - tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.) - ) + tt.log(tt.erfcx(-z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2, + tt.log1p(-tt.erfc(z / tt.sqrt(2.0)) / 2.0), + ), ) class StudentT(Continuous): - R""" + r""" Student's T log-likelihood. Describes a normal variable whose precision is gamma distributed. @@ -1790,12 +1853,12 @@ def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs): self.sd = sd = tt.as_tensor_variable(sd) self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu) - self.variance = tt.switch((nu > 2) * 1, - (1 / self.lam) * (nu / (nu - 2)), - np.inf) + self.variance = tt.switch( + (nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf + ) - assert_negative_support(lam, 'lam (sd)', 'StudentT') - assert_negative_support(nu, 'nu', 'StudentT') + assert_negative_support(lam, "lam (sd)", "StudentT") + assert_negative_support(nu, "nu", "StudentT") def random(self, point=None, size=None): """ @@ -1814,11 +1877,10 @@ def random(self, point=None, size=None): ------- array """ - nu, mu, lam = draw_values([self.nu, self.mu, self.lam], - point=point, size=size) - return generate_samples(stats.t.rvs, nu, loc=mu, scale=lam**-0.5, - dist_shape=self.shape, - size=size) + nu, mu, lam = draw_values([self.nu, self.mu, self.lam], point=point, size=size) + return generate_samples( + stats.t.rvs, nu, loc=mu, scale=lam ** -0.5, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -1839,11 +1901,15 @@ def logp(self, value): lam = self.lam sd = self.sd - return bound(gammaln((nu + 1.0) / 2.0) - + .5 * tt.log(lam / (nu * np.pi)) - - gammaln(nu / 2.0) - - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu)**2 / nu), - lam > 0, nu > 0, sd > 0) + return bound( + gammaln((nu + 1.0) / 2.0) + + 0.5 * tt.log(lam / (nu * np.pi)) + - gammaln(nu / 2.0) + - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu) ** 2 / nu), + lam > 0, + nu > 0, + sd > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -1851,24 +1917,23 @@ def _repr_latex_(self, name=None, dist=None): nu = dist.nu mu = dist.mu lam = dist.lam - name = r'\text{%s}' % name - return r'${} \sim \text{{StudentT}}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{lam}}={})$'.format(name, - get_variable_name(nu), - get_variable_name(mu), - get_variable_name(lam)) + name = r"\text{%s}" % name + return r"${} \sim \text{{StudentT}}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{lam}}={})$".format( + name, get_variable_name(nu), get_variable_name(mu), get_variable_name(lam) + ) def logcdf(self, value): nu = self.nu mu = self.mu sd = self.sd - t = (value - mu)/sd - sqrt_t2_nu = tt.sqrt(t**2 + nu) - x = (t + sqrt_t2_nu)/(2.0 * sqrt_t2_nu) - return tt.log(incomplete_beta(nu/2., nu/2., x)) + t = (value - mu) / sd + sqrt_t2_nu = tt.sqrt(t ** 2 + nu) + x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) + return tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)) class Pareto(Continuous): - R""" + r""" Pareto log-likelihood. Often used to characterize wealth distribution, or other examples of the @@ -1912,28 +1977,28 @@ class Pareto(Continuous): Scale parameter (m > 0). """ - def __init__(self, alpha, m, transform='lowerbound', *args, **kwargs): + def __init__(self, alpha, m, transform="lowerbound", *args, **kwargs): self.alpha = alpha = tt.as_tensor_variable(alpha) self.m = m = tt.as_tensor_variable(m) - self.mean = tt.switch(tt.gt(alpha, 1), alpha * - m / (alpha - 1.), np.inf) - self.median = m * 2.**(1. / alpha) + self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf) + self.median = m * 2.0 ** (1.0 / alpha) self.variance = tt.switch( tt.gt(alpha, 2), - (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2), - np.inf) + (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2), + np.inf, + ) - assert_negative_support(alpha, 'alpha', 'Pareto') - assert_negative_support(m, 'm', 'Pareto') + assert_negative_support(alpha, "alpha", "Pareto") + assert_negative_support(m, "m", "Pareto") - if transform == 'lowerbound': + if transform == "lowerbound": transform = transforms.lowerbound(self.m) super(Pareto, self).__init__(transform=transform, *args, **kwargs) def _random(self, alpha, m, size=None): u = np.random.uniform(size=size) - return m * (1. - u)**(-1. / alpha) + return m * (1.0 - u) ** (-1.0 / alpha) def random(self, point=None, size=None): """ @@ -1952,11 +2017,10 @@ def random(self, point=None, size=None): ------- array """ - alpha, m = draw_values([self.alpha, self.m], - point=point, size=size) - return generate_samples(self._random, alpha, m, - dist_shape=self.shape, - size=size) + alpha, m = draw_values([self.alpha, self.m], point=point, size=size) + return generate_samples( + self._random, alpha, m, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -1974,19 +2038,22 @@ def logp(self, value): """ alpha = self.alpha m = self.m - return bound(tt.log(alpha) + logpow(m, alpha) - - logpow(value, alpha + 1), - value >= m, alpha > 0, m > 0) + return bound( + tt.log(alpha) + logpow(m, alpha) - logpow(value, alpha + 1), + value >= m, + alpha > 0, + m > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self alpha = dist.alpha m = dist.m - name = r'\text{%s}' % name - return r'${} \sim \text{{Pareto}}(\mathit{{alpha}}={},~\mathit{{m}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(m)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Pareto}}(\mathit{{alpha}}={},~\mathit{{m}}={})$".format( + name, get_variable_name(alpha), get_variable_name(m) + ) def logcdf(self, value): m = self.m @@ -1995,16 +2062,12 @@ def logcdf(self, value): return tt.switch( tt.lt(value, m), -np.inf, - tt.switch( - tt.le(arg, 1e-5), - tt.log1p(-arg), - tt.log(1 - arg) - ) + tt.switch(tt.le(arg, 1e-5), tt.log1p(-arg), tt.log(1 - arg)), ) class Cauchy(Continuous): - R""" + r""" Cauchy log-likelihood. Also known as the Lorentz or the Breit-Wigner distribution. @@ -2053,7 +2116,7 @@ def __init__(self, alpha, beta, *args, **kwargs): self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha) self.beta = tt.as_tensor_variable(beta) - assert_negative_support(beta, 'beta', 'Cauchy') + assert_negative_support(beta, "beta", "Cauchy") def _random(self, alpha, beta, size=None): u = np.random.uniform(size=size) @@ -2076,11 +2139,10 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta = draw_values([self.alpha, self.beta], - point=point, size=size) - return generate_samples(self._random, alpha, beta, - dist_shape=self.shape, - size=size) + alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) + return generate_samples( + self._random, alpha, beta, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -2098,28 +2160,27 @@ def logp(self, value): """ alpha = self.alpha beta = self.beta - return bound(- tt.log(np.pi) - tt.log(beta) - - tt.log1p(((value - alpha) / beta)**2), - beta > 0) + return bound( + -tt.log(np.pi) - tt.log(beta) - tt.log1p(((value - alpha) / beta) ** 2), + beta > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self alpha = dist.alpha beta = dist.beta - name = r'\text{%s}' % name - return r'${} \sim \text{{Cauchy}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Cauchy}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(alpha), get_variable_name(beta) + ) def logcdf(self, value): - return tt.log( - 0.5 + tt.arctan((value - self.alpha) / self.beta) / np.pi - ) + return tt.log(0.5 + tt.arctan((value - self.alpha) / self.beta) / np.pi) class HalfCauchy(PositiveContinuous): - R""" + r""" Half-Cauchy log-likelihood. The pdf of this distribution is @@ -2162,7 +2223,7 @@ def __init__(self, beta, *args, **kwargs): self.median = tt.as_tensor_variable(beta) self.beta = tt.as_tensor_variable(beta) - assert_negative_support(beta, 'beta', 'HalfCauchy') + assert_negative_support(beta, "beta", "HalfCauchy") def _random(self, beta, size=None): u = np.random.uniform(size=size) @@ -2186,9 +2247,7 @@ def random(self, point=None, size=None): array """ beta = draw_values([self.beta], point=point, size=size)[0] - return generate_samples(self._random, beta, - dist_shape=self.shape, - size=size) + return generate_samples(self._random, beta, dist_shape=self.shape, size=size) def logp(self, value): """ @@ -2205,29 +2264,29 @@ def logp(self, value): TensorVariable """ beta = self.beta - return bound(tt.log(2) - tt.log(np.pi) - tt.log(beta) - - tt.log1p((value / beta)**2), - value >= 0, beta > 0) + return bound( + tt.log(2) - tt.log(np.pi) - tt.log(beta) - tt.log1p((value / beta) ** 2), + value >= 0, + beta > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self beta = dist.beta - name = r'\text{%s}' % name - return r'${} \sim \text{{HalfCauchy}}(\mathit{{beta}}={})$'.format(name, - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{HalfCauchy}}(\mathit{{beta}}={})$".format( + name, get_variable_name(beta) + ) def logcdf(self, value): return tt.switch( - tt.le(value, 0), - -np.inf, - tt.log( - 2 * tt.arctan(value / self.beta) / np.pi - )) + tt.le(value, 0), -np.inf, tt.log(2 * tt.arctan(value / self.beta) / np.pi) + ) class Gamma(PositiveContinuous): - R""" + r""" Gamma log-likelihood. Represents the sum of alpha exponentially distributed random variables, @@ -2284,29 +2343,30 @@ class Gamma(PositiveContinuous): Alternative scale parameter (sd > 0). """ - def __init__(self, alpha=None, beta=None, mu=None, sd=None, - *args, **kwargs): + def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): super(Gamma, self).__init__(*args, **kwargs) alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha = tt.as_tensor_variable(alpha) self.beta = beta = tt.as_tensor_variable(beta) self.mean = alpha / beta self.mode = tt.maximum((alpha - 1) / beta, 0) - self.variance = alpha / beta**2 + self.variance = alpha / beta ** 2 - assert_negative_support(alpha, 'alpha', 'Gamma') - assert_negative_support(beta, 'beta', 'Gamma') + assert_negative_support(alpha, "alpha", "Gamma") + assert_negative_support(beta, "beta", "Gamma") def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): if (alpha is not None) and (beta is not None): pass elif (mu is not None) and (sd is not None): - alpha = mu**2 / sd**2 - beta = mu / sd**2 + alpha = mu ** 2 / sd ** 2 + beta = mu / sd ** 2 else: - raise ValueError('Incompatible parameterization. Either use ' - 'alpha and beta, or mu and sd to specify ' - 'distribution.') + raise ValueError( + "Incompatible parameterization. Either use " + "alpha and beta, or mu and sd to specify " + "distribution." + ) return alpha, beta @@ -2327,11 +2387,10 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta = draw_values([self.alpha, self.beta], - point=point, size=size) - return generate_samples(stats.gamma.rvs, alpha, scale=1. / beta, - dist_shape=self.shape, - size=size) + alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) + return generate_samples( + stats.gamma.rvs, alpha, scale=1.0 / beta, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -2350,25 +2409,28 @@ def logp(self, value): alpha = self.alpha beta = self.beta return bound( - -gammaln(alpha) + logpow( - beta, alpha) - beta * value + logpow(value, alpha - 1), + -gammaln(alpha) + + logpow(beta, alpha) + - beta * value + + logpow(value, alpha - 1), value >= 0, alpha > 0, - beta > 0) + beta > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self beta = dist.beta alpha = dist.alpha - name = r'\text{%s}' % name - return r'${} \sim \text{{Gamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Gamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(alpha), get_variable_name(beta) + ) class InverseGamma(PositiveContinuous): - R""" + r""" Inverse gamma log-likelihood, the reciprocal of the gamma distribution. The pdf of this distribution is @@ -2416,22 +2478,22 @@ class InverseGamma(PositiveContinuous): """ def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): - super(InverseGamma, self).__init__(*args, defaults=('mode',), **kwargs) + super(InverseGamma, self).__init__(*args, defaults=("mode",), **kwargs) alpha, beta = InverseGamma._get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha = tt.as_tensor_variable(alpha) self.beta = beta = tt.as_tensor_variable(beta) self.mean = self._calculate_mean() - self.mode = beta / (alpha + 1.) - self.variance = tt.switch(tt.gt(alpha, 2), - (beta**2) / ((alpha - 2) * (alpha - 1.)**2), - np.inf) - assert_negative_support(alpha, 'alpha', 'InverseGamma') - assert_negative_support(beta, 'beta', 'InverseGamma') + self.mode = beta / (alpha + 1.0) + self.variance = tt.switch( + tt.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf + ) + assert_negative_support(alpha, "alpha", "InverseGamma") + assert_negative_support(beta, "beta", "InverseGamma") def _calculate_mean(self): - m = self.beta / (self.alpha - 1.) + m = self.beta / (self.alpha - 1.0) try: return (self.alpha > 1) * m or np.inf except ValueError: # alpha is an array @@ -2440,18 +2502,20 @@ def _calculate_mean(self): @staticmethod def _get_alpha_beta(alpha, beta, mu, sd): - if (alpha is not None): - if (beta is not None): + if alpha is not None: + if beta is not None: pass else: beta = 1 elif (mu is not None) and (sd is not None): - alpha = (2 * sd**2 + mu**2)/sd**2 - beta = mu * (mu**2 + sd**2) / sd**2 + alpha = (2 * sd ** 2 + mu ** 2) / sd ** 2 + beta = mu * (mu ** 2 + sd ** 2) / sd ** 2 else: - raise ValueError('Incompatible parameterization. Either use ' - 'alpha and (optionally) beta, or mu and sd to specify ' - 'distribution.') + raise ValueError( + "Incompatible parameterization. Either use " + "alpha and (optionally) beta, or mu and sd to specify " + "distribution." + ) return alpha, beta @@ -2472,11 +2536,10 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta = draw_values([self.alpha, self.beta], - point=point, size=size) - return generate_samples(stats.invgamma.rvs, a=alpha, scale=beta, - dist_shape=self.shape, - size=size) + alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) + return generate_samples( + stats.invgamma.rvs, a=alpha, scale=beta, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -2494,23 +2557,29 @@ def logp(self, value): """ alpha = self.alpha beta = self.beta - return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value - + logpow(value, -alpha - 1), - value > 0, alpha > 0, beta > 0) + return bound( + logpow(beta, alpha) + - gammaln(alpha) + - beta / value + + logpow(value, -alpha - 1), + value > 0, + alpha > 0, + beta > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self beta = dist.beta alpha = dist.alpha - name = r'\text{%s}' % name - return r'${} \sim \text{{InverseGamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{InverseGamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(alpha), get_variable_name(beta) + ) class ChiSquared(Gamma): - R""" + r""" :math:`\chi^2` log-likelihood. The pdf of this distribution is @@ -2549,20 +2618,18 @@ class ChiSquared(Gamma): def __init__(self, nu, *args, **kwargs): self.nu = nu = tt.as_tensor_variable(nu) - super(ChiSquared, self).__init__(alpha=nu / 2., beta=0.5, - *args, **kwargs) + super(ChiSquared, self).__init__(alpha=nu / 2.0, beta=0.5, *args, **kwargs) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self nu = dist.nu - name = r'\text{%s}' % name - return r'${} \sim \Chi^2(\mathit{{nu}}={})$'.format(name, - get_variable_name(nu)) + name = r"\text{%s}" % name + return r"${} \sim \Chi^2(\mathit{{nu}}={})$".format(name, get_variable_name(nu)) class Weibull(PositiveContinuous): - R""" + r""" Weibull log-likelihood. The pdf of this distribution is @@ -2609,16 +2676,15 @@ def __init__(self, alpha, beta, *args, **kwargs): super(Weibull, self).__init__(*args, **kwargs) self.alpha = alpha = tt.as_tensor_variable(alpha) self.beta = beta = tt.as_tensor_variable(beta) - self.mean = beta * tt.exp(gammaln(1 + 1. / alpha)) - self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha) - self.variance = (beta**2) * \ - tt.exp(gammaln(1 + 2. / alpha - self.mean**2)) - self.mode = tt.switch(alpha >= 1, - beta * ((alpha - 1)/alpha) ** (1 / alpha), - 0) # Reference: https://en.wikipedia.org/wiki/Weibull_distribution + self.mean = beta * tt.exp(gammaln(1 + 1.0 / alpha)) + self.median = beta * tt.exp(gammaln(tt.log(2))) ** (1.0 / alpha) + self.variance = (beta ** 2) * tt.exp(gammaln(1 + 2.0 / alpha - self.mean ** 2)) + self.mode = tt.switch( + alpha >= 1, beta * ((alpha - 1) / alpha) ** (1 / alpha), 0 + ) # Reference: https://en.wikipedia.org/wiki/Weibull_distribution - assert_negative_support(alpha, 'alpha', 'Weibull') - assert_negative_support(beta, 'beta', 'Weibull') + assert_negative_support(alpha, "alpha", "Weibull") + assert_negative_support(beta, "beta", "Weibull") def random(self, point=None, size=None): """ @@ -2637,15 +2703,12 @@ def random(self, point=None, size=None): ------- array """ - alpha, beta = draw_values([self.alpha, self.beta], - point=point, size=size) + alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) def _random(a, b, size=None): - return b * (-np.log(np.random.uniform(size=size)))**(1 / a) + return b * (-np.log(np.random.uniform(size=size))) ** (1 / a) - return generate_samples(_random, alpha, beta, - dist_shape=self.shape, - size=size) + return generate_samples(_random, alpha, beta, dist_shape=self.shape, size=size) def logp(self, value): """ @@ -2663,23 +2726,28 @@ def logp(self, value): """ alpha = self.alpha beta = self.beta - return bound(tt.log(alpha) - tt.log(beta) - + (alpha - 1) * tt.log(value / beta) - - (value / beta)**alpha, - value >= 0, alpha > 0, beta > 0) + return bound( + tt.log(alpha) + - tt.log(beta) + + (alpha - 1) * tt.log(value / beta) + - (value / beta) ** alpha, + value >= 0, + alpha > 0, + beta > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self beta = dist.beta alpha = dist.alpha - name = r'\text{%s}' % name - return r'${} \sim \text{{Weibull}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Weibull}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(alpha), get_variable_name(beta) + ) def logcdf(self, value): - ''' + """ Compute the log CDF for the Weibull distribution References @@ -2687,22 +2755,21 @@ def logcdf(self, value): .. [Machler2012] Martin Mächler (2012). "Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr package" - ''' + """ alpha = self.alpha beta = self.beta - a = (value / beta)**alpha + a = (value / beta) ** alpha return tt.switch( tt.le(value, 0.0), -np.inf, tt.switch( - tt.le(a, tt.log(2.0)), - tt.log(-tt.expm1(-a)), - tt.log1p(-tt.exp(-a))) + tt.le(a, tt.log(2.0)), tt.log(-tt.expm1(-a)), tt.log1p(-tt.exp(-a)) + ), ) class HalfStudentT(PositiveContinuous): - R""" + r""" Half Student's T log-likelihood The pdf of this distribution is @@ -2767,9 +2834,9 @@ def __init__(self, nu=1, sd=None, lam=None, *args, **kwargs): self.lam = tt.as_tensor_variable(lam) self.nu = nu = tt.as_tensor_variable(nu) - assert_negative_support(sd, 'sd', 'HalfStudentT') - assert_negative_support(lam, 'lam', 'HalfStudentT') - assert_negative_support(nu, 'nu', 'HalfStudentT') + assert_negative_support(sd, "sd", "HalfStudentT") + assert_negative_support(lam, "lam", "HalfStudentT") + assert_negative_support(nu, "nu", "HalfStudentT") def random(self, point=None, size=None): """ @@ -2789,9 +2856,11 @@ def random(self, point=None, size=None): array """ nu, sd = draw_values([self.nu, self.sd], point=point, size=size) - return np.abs(generate_samples(stats.t.rvs, nu, loc=0, scale=sd, - dist_shape=self.shape, - size=size)) + return np.abs( + generate_samples( + stats.t.rvs, nu, loc=0, scale=sd, dist_shape=self.shape, size=size + ) + ) def logp(self, value): """ @@ -2811,25 +2880,31 @@ def logp(self, value): sd = self.sd lam = self.lam - return bound(tt.log(2) + gammaln((nu + 1.0) / 2.0) - - gammaln(nu / 2.0) - - .5 * tt.log(nu * np.pi * sd**2) - - (nu + 1.0) / 2.0 * tt.log1p(value ** 2 / (nu * sd**2)), - sd > 0, lam > 0, nu > 0, value >= 0) + return bound( + tt.log(2) + + gammaln((nu + 1.0) / 2.0) + - gammaln(nu / 2.0) + - 0.5 * tt.log(nu * np.pi * sd ** 2) + - (nu + 1.0) / 2.0 * tt.log1p(value ** 2 / (nu * sd ** 2)), + sd > 0, + lam > 0, + nu > 0, + value >= 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self nu = dist.nu sd = dist.sd - name = r'\text{%s}' % name - return r'${} \sim \text{{HalfStudentT}}(\mathit{{nu}}={},~\mathit{{sd}}={})$'.format(name, - get_variable_name(nu), - get_variable_name(sd)) + name = r"\text{%s}" % name + return r"${} \sim \text{{HalfStudentT}}(\mathit{{nu}}={},~\mathit{{sd}}={})$".format( + name, get_variable_name(nu), get_variable_name(sd) + ) class ExGaussian(Continuous): - R""" + r""" Exponentially modified Gaussian log-likelihood. Results from the convolution of a normal distribution with an exponential @@ -2899,10 +2974,10 @@ def __init__(self, mu, sigma, nu, *args, **kwargs): self.sigma = sigma = tt.as_tensor_variable(sigma) self.nu = nu = tt.as_tensor_variable(nu) self.mean = mu + nu - self.variance = (sigma**2) + (nu**2) + self.variance = (sigma ** 2) + (nu ** 2) - assert_negative_support(sigma, 'sigma', 'ExGaussian') - assert_negative_support(nu, 'nu', 'ExGaussian') + assert_negative_support(sigma, "sigma", "ExGaussian") + assert_negative_support(nu, "nu", "ExGaussian") def random(self, point=None, size=None): """ @@ -2921,16 +2996,18 @@ def random(self, point=None, size=None): ------- array """ - mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu], - point=point, size=size) + mu, sigma, nu = draw_values( + [self.mu, self.sigma, self.nu], point=point, size=size + ) def _random(mu, sigma, nu, size=None): - return (np.random.normal(mu, sigma, size=size) - + np.random.exponential(scale=nu, size=size)) + return np.random.normal(mu, sigma, size=size) + np.random.exponential( + scale=nu, size=size + ) - return generate_samples(_random, mu, sigma, nu, - dist_shape=self.shape, - size=size) + return generate_samples( + _random, mu, sigma, nu, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -2951,12 +3028,15 @@ def logp(self, value): nu = self.nu # This condition suggested by exGAUS.R from gamlss - lp = tt.switch(tt.gt(nu, 0.05 * sigma), - - tt.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu)**2 - + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.), - - tt.log(sigma * tt.sqrt(2 * np.pi)) - - 0.5 * ((value - mu) / sigma)**2) - return bound(lp, sigma > 0., nu > 0.) + lp = tt.switch( + tt.gt(nu, 0.05 * sigma), + -tt.log(nu) + + (mu - value) / nu + + 0.5 * (sigma / nu) ** 2 + + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.0), + -tt.log(sigma * tt.sqrt(2 * np.pi)) - 0.5 * ((value - mu) / sigma) ** 2, + ) + return bound(lp, sigma > 0.0, nu > 0.0) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -2964,11 +3044,10 @@ def _repr_latex_(self, name=None, dist=None): sigma = dist.sigma mu = dist.mu nu = dist.nu - name = r'\text{%s}' % name - return r'${} \sim \text{{ExGaussian}}(\mathit{{mu}}={},~\mathit{{sigma}}={},~\mathit{{nu}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(sigma), - get_variable_name(nu)) + name = r"\text{%s}" % name + return r"${} \sim \text{{ExGaussian}}(\mathit{{mu}}={},~\mathit{{sigma}}={},~\mathit{{nu}}={})$".format( + name, get_variable_name(mu), get_variable_name(sigma), get_variable_name(nu) + ) def logcdf(self, value): """ @@ -2982,21 +3061,29 @@ def logcdf(self, value): """ mu = self.mu sigma = self.sigma - sigma_2 = sigma**2 + sigma_2 = sigma ** 2 nu = self.nu - z = value - mu - sigma_2/nu + z = value - mu - sigma_2 / nu return tt.switch( tt.gt(nu, 0.05 * sigma), - tt.log(std_cdf((value - mu)/sigma) - - std_cdf(z/sigma) * tt.exp( - ((mu + (sigma_2/nu))**2 - - (mu**2) - - 2 * value * ((sigma_2)/nu))/(2 * sigma_2))), - normal_lcdf(mu, sigma, value)) + tt.log( + std_cdf((value - mu) / sigma) + - std_cdf(z / sigma) + * tt.exp( + ( + (mu + (sigma_2 / nu)) ** 2 + - (mu ** 2) + - 2 * value * ((sigma_2) / nu) + ) + / (2 * sigma_2) + ) + ), + normal_lcdf(mu, sigma, value), + ) class VonMises(Continuous): - R""" + r""" Univariate VonMises log-likelihood. The pdf of this distribution is @@ -3039,15 +3126,14 @@ class VonMises(Continuous): Concentration (\frac{1}{kappa} is analogous to \sigma^2). """ - def __init__(self, mu=0.0, kappa=None, transform='circular', - *args, **kwargs): - if transform == 'circular': + def __init__(self, mu=0.0, kappa=None, transform="circular", *args, **kwargs): + if transform == "circular": transform = transforms.Circular() super(VonMises, self).__init__(transform=transform, *args, **kwargs) self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu) self.kappa = kappa = floatX(tt.as_tensor_variable(kappa)) - assert_negative_support(kappa, 'kappa', 'VonMises') + assert_negative_support(kappa, "kappa", "VonMises") def random(self, point=None, size=None): """ @@ -3066,11 +3152,10 @@ def random(self, point=None, size=None): ------- array """ - mu, kappa = draw_values([self.mu, self.kappa], - point=point, size=size) - return generate_samples(stats.vonmises.rvs, loc=mu, kappa=kappa, - dist_shape=self.shape, - size=size) + mu, kappa = draw_values([self.mu, self.kappa], point=point, size=size) + return generate_samples( + stats.vonmises.rvs, loc=mu, kappa=kappa, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -3088,23 +3173,26 @@ def logp(self, value): """ mu = self.mu kappa = self.kappa - return bound(kappa * tt.cos(mu - value) - (tt.log(2 * np.pi) + log_i0(kappa)), - kappa > 0, value >= -np.pi, value <= np.pi) + return bound( + kappa * tt.cos(mu - value) - (tt.log(2 * np.pi) + log_i0(kappa)), + kappa > 0, + value >= -np.pi, + value <= np.pi, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self kappa = dist.kappa mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{VonMises}}(\mathit{{mu}}={},~\mathit{{kappa}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(kappa)) - + name = r"\text{%s}" % name + return r"${} \sim \text{{VonMises}}(\mathit{{mu}}={},~\mathit{{kappa}}={})$".format( + name, get_variable_name(mu), get_variable_name(kappa) + ) class SkewNormal(Continuous): - R""" + r""" Univariate skew-normal log-likelihood. The pdf of this distribution is @@ -3170,11 +3258,13 @@ def __init__(self, mu=0.0, sd=None, tau=None, alpha=1, *args, **kwargs): self.alpha = alpha = tt.as_tensor_variable(alpha) - self.mean = mu + self.sd * (2 / np.pi)**0.5 * alpha / (1 + alpha**2)**0.5 - self.variance = self.sd**2 * (1 - (2 * alpha**2) / ((1 + alpha**2) * np.pi)) + self.mean = mu + self.sd * (2 / np.pi) ** 0.5 * alpha / (1 + alpha ** 2) ** 0.5 + self.variance = self.sd ** 2 * ( + 1 - (2 * alpha ** 2) / ((1 + alpha ** 2) * np.pi) + ) - assert_negative_support(tau, 'tau', 'SkewNormal') - assert_negative_support(sd, 'sd', 'SkewNormal') + assert_negative_support(tau, "tau", "SkewNormal") + assert_negative_support(sd, "sd", "SkewNormal") def random(self, point=None, size=None): """ @@ -3194,11 +3284,16 @@ def random(self, point=None, size=None): array """ mu, tau, _, alpha = draw_values( - [self.mu, self.tau, self.sd, self.alpha], point=point, size=size) - return generate_samples(stats.skewnorm.rvs, - a=alpha, loc=mu, scale=tau**-0.5, - dist_shape=self.shape, - size=size) + [self.mu, self.tau, self.sd, self.alpha], point=point, size=size + ) + return generate_samples( + stats.skewnorm.rvs, + a=alpha, + loc=mu, + scale=tau ** -0.5, + dist_shape=self.shape, + size=size, + ) def logp(self, value): """ @@ -3219,11 +3314,11 @@ def logp(self, value): mu = self.mu alpha = self.alpha return bound( - tt.log(1 + - tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) - + (-tau * (value - mu)**2 - + tt.log(tau / np.pi / 2.)) / 2., - tau > 0, sd > 0) + tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) + + (-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, + tau > 0, + sd > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -3231,15 +3326,14 @@ def _repr_latex_(self, name=None, dist=None): sd = dist.sd mu = dist.mu alpha = dist.alpha - name = r'\text{%s}' % name - return r'${} \sim \text{{Skew-Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={},~\mathit{{alpha}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(sd), - get_variable_name(alpha)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Skew-Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={},~\mathit{{alpha}}={})$".format( + name, get_variable_name(mu), get_variable_name(sd), get_variable_name(alpha) + ) class Triangular(BoundedContinuous): - R""" + r""" Continuous Triangular log-likelihood The pdf of this distribution is @@ -3290,14 +3384,12 @@ class Triangular(BoundedContinuous): Upper limit. """ - def __init__(self, lower=0, upper=1, c=0.5, - *args, **kwargs): + def __init__(self, lower=0, upper=1, c=0.5, *args, **kwargs): self.median = self.mean = self.c = c = tt.as_tensor_variable(c) self.lower = lower = tt.as_tensor_variable(lower) self.upper = upper = tt.as_tensor_variable(upper) - super(Triangular, self).__init__(lower=lower, upper=upper, - *args, **kwargs) + super(Triangular, self).__init__(lower=lower, upper=upper, *args, **kwargs) def random(self, point=None, size=None): """ @@ -3316,10 +3408,18 @@ def random(self, point=None, size=None): ------- array """ - c, lower, upper = draw_values([self.c, self.lower, self.upper], - point=point, size=size) - return generate_samples(stats.triang.rvs, c=c-lower, loc=lower, scale=upper-lower, - size=size, dist_shape=self.shape, random_state=None) + c, lower, upper = draw_values( + [self.c, self.lower, self.upper], point=point, size=size + ) + return generate_samples( + stats.triang.rvs, + c=c - lower, + loc=lower, + scale=upper - lower, + size=size, + dist_shape=self.shape, + random_state=None, + ) def logp(self, value): """ @@ -3338,13 +3438,19 @@ def logp(self, value): c = self.c lower = self.lower upper = self.upper - return tt.switch(alltrue_elemwise([lower <= value, value < c]), - tt.log(2 * (value - lower) / ((upper - lower) * (c - lower))), - tt.switch(tt.eq(value, c), - tt.log(2 / (upper - lower)), - tt.switch(alltrue_elemwise([c < value, value <= upper]), - tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))), - np.inf))) + return tt.switch( + alltrue_elemwise([lower <= value, value < c]), + tt.log(2 * (value - lower) / ((upper - lower) * (c - lower))), + tt.switch( + tt.eq(value, c), + tt.log(2 / (upper - lower)), + tt.switch( + alltrue_elemwise([c < value, value <= upper]), + tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))), + np.inf, + ), + ), + ) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -3352,11 +3458,13 @@ def _repr_latex_(self, name=None, dist=None): lower = dist.lower upper = dist.upper c = dist.c - name = r'\text{%s}' % name - return r'${} \sim \text{{Triangular}}(\mathit{{c}}={},~\mathit{{lower}}={},~\mathit{{upper}}={})$'.format(name, - get_variable_name(c), - get_variable_name(lower), - get_variable_name(upper)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Triangular}}(\mathit{{c}}={},~\mathit{{lower}}={},~\mathit{{upper}}={})$".format( + name, + get_variable_name(c), + get_variable_name(lower), + get_variable_name(upper), + ) def logcdf(self, value): l = self.lower @@ -3371,14 +3479,14 @@ def logcdf(self, value): tt.switch( tt.lt(value, u), tt.log1p(-((u - value) ** 2) / ((u - l) * (u - c))), - 0 - ) - ) + 0, + ), + ), ) class Gumbel(Continuous): - R""" + r""" Univariate Gumbel log-likelihood The pdf of this distribution is @@ -3423,7 +3531,7 @@ def __init__(self, mu=0, beta=1.0, **kwargs): self.mu = tt.as_tensor_variable(mu) self.beta = tt.as_tensor_variable(beta) - assert_negative_support(beta, 'beta', 'Gumbel') + assert_negative_support(beta, "beta", "Gumbel") self.mean = self.mu + self.beta * np.euler_gamma self.median = self.mu - self.beta * tt.log(tt.log(2)) @@ -3450,9 +3558,9 @@ def random(self, point=None, size=None): array """ mu, sd = draw_values([self.mu, self.beta], point=point, size=size) - return generate_samples(stats.gumbel_r.rvs, loc=mu, scale=sd, - dist_shape=self.shape, - size=size) + return generate_samples( + stats.gumbel_r.rvs, loc=mu, scale=sd, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -3476,20 +3584,20 @@ def _repr_latex_(self, name=None, dist=None): dist = self beta = dist.beta mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{Gumbel}}(\mathit{{mu}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Gumbel}}(\mathit{{mu}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(mu), get_variable_name(beta) + ) def logcdf(self, value): beta = self.beta mu = self.mu - return -tt.exp(-(value - mu)/beta) + return -tt.exp(-(value - mu) / beta) class Rice(PositiveContinuous): - R""" + r""" Rice distribution. .. math:: @@ -3519,10 +3627,29 @@ def __init__(self, nu=None, sd=None, *args, **kwargs): super(Rice, self).__init__(*args, **kwargs) self.nu = nu = tt.as_tensor_variable(nu) self.sd = sd = tt.as_tensor_variable(sd) - self.mean = sd * np.sqrt(np.pi / 2) * tt.exp((-nu**2 / (2 * sd**2)) / 2) * ((1 - (-nu**2 / (2 * sd**2))) - * i0(-(-nu**2 / (2 * sd**2)) / 2) - (-nu**2 / (2 * sd**2)) * i1(-(-nu**2 / (2 * sd**2)) / 2)) - self.variance = 2 * sd**2 + nu**2 - (np.pi * sd**2 / 2) * (tt.exp((-nu**2 / (2 * sd**2)) / 2) * ((1 - (-nu**2 / ( - 2 * sd**2))) * i0(-(-nu**2 / (2 * sd**2)) / 2) - (-nu**2 / (2 * sd**2)) * i1(-(-nu**2 / (2 * sd**2)) / 2)))**2 + self.mean = ( + sd + * np.sqrt(np.pi / 2) + * tt.exp((-nu ** 2 / (2 * sd ** 2)) / 2) + * ( + (1 - (-nu ** 2 / (2 * sd ** 2))) * i0(-(-nu ** 2 / (2 * sd ** 2)) / 2) + - (-nu ** 2 / (2 * sd ** 2)) * i1(-(-nu ** 2 / (2 * sd ** 2)) / 2) + ) + ) + self.variance = ( + 2 * sd ** 2 + + nu ** 2 + - (np.pi * sd ** 2 / 2) + * ( + tt.exp((-nu ** 2 / (2 * sd ** 2)) / 2) + * ( + (1 - (-nu ** 2 / (2 * sd ** 2))) + * i0(-(-nu ** 2 / (2 * sd ** 2)) / 2) + - (-nu ** 2 / (2 * sd ** 2)) * i1(-(-nu ** 2 / (2 * sd ** 2)) / 2) + ) + ) + ** 2 + ) def random(self, point=None, size=None): """ @@ -3541,10 +3668,10 @@ def random(self, point=None, size=None): ------- array """ - nu, sd = draw_values([self.nu, self.sd], - point=point, size=size) - return generate_samples(stats.rice.rvs, b=nu, scale=sd, loc=0, - dist_shape=self.shape, size=size) + nu, sd = draw_values([self.nu, self.sd], point=point, size=size) + return generate_samples( + stats.rice.rvs, b=nu, scale=sd, loc=0, dist_shape=self.shape, size=size + ) def logp(self, value): """ @@ -3563,15 +3690,16 @@ def logp(self, value): nu = self.nu sd = self.sd x = value / sd - return bound(tt.log(x * tt.exp((-(x - nu) * (x - nu)) / 2) * i0e(x * nu) / sd), - sd >= 0, - nu >= 0, - value > 0, - ) + return bound( + tt.log(x * tt.exp((-(x - nu) * (x - nu)) / 2) * i0e(x * nu) / sd), + sd >= 0, + nu >= 0, + value > 0, + ) class Logistic(Continuous): - R""" + r""" Logistic log-likelihood. The pdf of this distribution is @@ -3613,14 +3741,14 @@ class Logistic(Continuous): Scale (s > 0). """ - def __init__(self, mu=0., s=1., *args, **kwargs): + def __init__(self, mu=0.0, s=1.0, *args, **kwargs): super(Logistic, self).__init__(*args, **kwargs) self.mu = tt.as_tensor_variable(mu) self.s = tt.as_tensor_variable(s) self.mean = self.mode = mu - self.variance = s**2 * np.pi**2 / 3. + self.variance = s ** 2 * np.pi ** 2 / 3.0 def logp(self, value): """ @@ -3640,7 +3768,9 @@ def logp(self, value): s = self.s return bound( - -(value - mu) / s - tt.log(s) - 2 * tt.log1p(tt.exp(-(value - mu) / s)), s > 0) + -(value - mu) / s - tt.log(s) - 2 * tt.log1p(tt.exp(-(value - mu) / s)), + s > 0, + ) def random(self, point=None, size=None): """ @@ -3662,20 +3792,18 @@ def random(self, point=None, size=None): mu, s = draw_values([self.mu, self.s], point=point, size=size) return generate_samples( - stats.logistic.rvs, - loc=mu, scale=s, - dist_shape=self.shape, - size=size) + stats.logistic.rvs, loc=mu, scale=s, dist_shape=self.shape, size=size + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self mu = dist.mu s = dist.s - name = r'\text{%s}' % name - return r'${} \sim \text{{Logistic}}(\mathit{{mu}}={},~\mathit{{s}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(s)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Logistic}}(\mathit{{mu}}={},~\mathit{{s}}={})$".format( + name, get_variable_name(mu), get_variable_name(s) + ) def logcdf(self, value): """ @@ -3689,21 +3817,20 @@ def logcdf(self, value): """ mu = self.mu s = self.s - a = -(value - mu)/s - return - tt.switch( + a = -(value - mu) / s + return -tt.switch( tt.le(a, -37), tt.exp(a), tt.switch( tt.le(a, 18), tt.log1p(tt.exp(a)), - tt.switch( - tt.le(a, 33.3), - tt.exp(-a) + a, - a))) + tt.switch(tt.le(a, 33.3), tt.exp(-a) + a, a), + ), + ) class LogitNormal(UnitContinuous): - R""" + r""" Logit-Normal log-likelihood. The pdf of this distribution is @@ -3753,8 +3880,8 @@ def __init__(self, mu=0, sd=None, tau=None, **kwargs): self.tau = tau = tt.as_tensor_variable(tau) self.median = invlogit(mu) - assert_negative_support(sd, 'sd', 'LogitNormal') - assert_negative_support(tau, 'tau', 'LogitNormal') + assert_negative_support(sd, "sd", "LogitNormal") + assert_negative_support(tau, "tau", "LogitNormal") super(LogitNormal, self).__init__(**kwargs) @@ -3775,10 +3902,12 @@ def random(self, point=None, size=None): ------- array """ - mu, _, sd = draw_values( - [self.mu, self.tau, self.sd], point=point, size=size) - return expit(generate_samples(stats.norm.rvs, loc=mu, scale=sd, dist_shape=self.shape, - size=size)) + mu, _, sd = draw_values([self.mu, self.tau, self.sd], point=point, size=size) + return expit( + generate_samples( + stats.norm.rvs, loc=mu, scale=sd, dist_shape=self.shape, size=size + ) + ) def logp(self, value): """ @@ -3797,23 +3926,28 @@ def logp(self, value): sd = self.sd mu = self.mu tau = self.tau - return bound(-0.5 * tau * (logit(value) - mu) ** 2 - + 0.5 * tt.log(tau / (2. * np.pi)) - - tt.log(value * (1 - value)), value > 0, value < 1, tau > 0) + return bound( + -0.5 * tau * (logit(value) - mu) ** 2 + + 0.5 * tt.log(tau / (2.0 * np.pi)) + - tt.log(value * (1 - value)), + value > 0, + value < 1, + tau > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self sd = dist.sd mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{LogitNormal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(sd)) + name = r"\text{%s}" % name + return r"${} \sim \text{{LogitNormal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$".format( + name, get_variable_name(mu), get_variable_name(sd) + ) class Interpolated(BoundedContinuous): - R""" + r""" Univariate probability distribution defined as a linear interpolation of probability density function evaluated on some lattice of points. @@ -3844,11 +3978,9 @@ def __init__(self, x_points, pdf_points, *args, **kwargs): self.lower = lower = tt.as_tensor_variable(x_points[0]) self.upper = upper = tt.as_tensor_variable(x_points[-1]) - super(Interpolated, self).__init__(lower=lower, upper=upper, - *args, **kwargs) + super(Interpolated, self).__init__(lower=lower, upper=upper, *args, **kwargs) - interp = InterpolatedUnivariateSpline( - x_points, pdf_points, k=1, ext='zeros') + interp = InterpolatedUnivariateSpline(x_points, pdf_points, k=1, ext="zeros") Z = interp.integral(x_points[0], x_points[-1]) self.Z = tt.as_tensor_variable(Z) @@ -3872,9 +4004,10 @@ def _argcdf(self, p): np.where( np.abs(pdf[index]) <= 1e-8, np.zeros(index.shape), - (p - cdf[index]) / pdf[index] + (p - cdf[index]) / pdf[index], ), - (-pdf[index] + np.sqrt(pdf[index] ** 2 + 2 * slope * (p - cdf[index]))) / slope + (-pdf[index] + np.sqrt(pdf[index] ** 2 + 2 * slope * (p - cdf[index]))) + / slope, ) def _random(self, size=None): @@ -3894,9 +4027,7 @@ def random(self, size=None): ------- array """ - return generate_samples(self._random, - dist_shape=self.shape, - size=size) + return generate_samples(self._random, dist_shape=self.shape, size=size) def logp(self, value): """ diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 86db441ae1..67da9f0694 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -10,14 +10,27 @@ from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp -__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull', - 'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant', - 'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial', - 'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic'] +__all__ = [ + "Binomial", + "BetaBinomial", + "Bernoulli", + "DiscreteWeibull", + "Poisson", + "NegativeBinomial", + "ConstantDist", + "Constant", + "ZeroInflatedPoisson", + "ZeroInflatedBinomial", + "ZeroInflatedNegativeBinomial", + "DiscreteUniform", + "Geometric", + "Categorical", + "OrderedLogistic", +] class Binomial(Discrete): - R""" + r""" Binomial log-likelihood. The discrete probability distribution of the number of successes @@ -66,9 +79,9 @@ def __init__(self, n, p, *args, **kwargs): def random(self, point=None, size=None): n, p = draw_values([self.n, self.p], point=point, size=size) - return generate_samples(stats.binom.rvs, n=n, p=p, - dist_shape=self.shape, - size=size) + return generate_samples( + stats.binom.rvs, n=n, p=p, dist_shape=self.shape, size=size + ) def logp(self, value): n = self.n @@ -76,21 +89,25 @@ def logp(self, value): return bound( binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value), - 0 <= value, value <= n, - 0 <= p, p <= 1) + 0 <= value, + value <= n, + 0 <= p, + p <= 1, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self n = dist.n p = dist.p - name = r'\text{%s}' % name - return r'${} \sim \text{{Binomial}}(\mathit{{n}}={},~\mathit{{p}}={})$'.format(name, - get_variable_name(n), - get_variable_name(p)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Binomial}}(\mathit{{n}}={},~\mathit{{p}}={})$".format( + name, get_variable_name(n), get_variable_name(p) + ) + class BetaBinomial(Discrete): - R""" + r""" Beta-binomial log-likelihood. Equivalent to binomial random variable with success probability @@ -149,7 +166,7 @@ def __init__(self, alpha, beta, n, *args, **kwargs): self.alpha = alpha = tt.as_tensor_variable(alpha) self.beta = beta = tt.as_tensor_variable(beta) self.n = n = tt.as_tensor_variable(n) - self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8') + self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8") def _random(self, alpha, beta, n, size=None): size = size or 1 @@ -163,42 +180,50 @@ def _random(self, alpha, beta, n, size=None): quotient, remainder = divmod(_p.shape[0], _n.shape[0]) if remainder != 0: - raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format( - _n.shape[0], _p.shape[0])) + raise TypeError( + "n has a bad size! Was cast to {}, must evenly divide {}".format( + _n.shape[0], _p.shape[0] + ) + ) if quotient != 1: _n = np.tile(_n, quotient) samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size) return samples def random(self, point=None, size=None): - alpha, beta, n = \ - draw_values([self.alpha, self.beta, self.n], point=point, size=size) - return generate_samples(self._random, alpha=alpha, beta=beta, n=n, - dist_shape=self.shape, - size=size) + alpha, beta, n = draw_values( + [self.alpha, self.beta, self.n], point=point, size=size + ) + return generate_samples( + self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size + ) def logp(self, value): alpha = self.alpha beta = self.beta - return bound(binomln(self.n, value) - + betaln(value + alpha, self.n - value + beta) - - betaln(alpha, beta), - value >= 0, value <= self.n, - alpha > 0, beta > 0) + return bound( + binomln(self.n, value) + + betaln(value + alpha, self.n - value + beta) + - betaln(alpha, beta), + value >= 0, + value <= self.n, + alpha > 0, + beta > 0, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self alpha = dist.alpha beta = dist.beta - name = r'\text{%s}' % name - return r'${} \sim \text{{BetaBinomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(alpha), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{BetaBinomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(alpha), get_variable_name(beta) + ) class Bernoulli(Discrete): - R"""Bernoulli log-likelihood + r"""Bernoulli log-likelihood The Bernoulli distribution describes the probability of successes (x=1) and failures (x=0). @@ -240,7 +265,7 @@ class Bernoulli(Discrete): def __init__(self, p=None, logit_p=None, *args, **kwargs): super(Bernoulli, self).__init__(*args, **kwargs) if sum(int(var is None) for var in [p, logit_p]) != 1: - raise ValueError('Specify one of p and logit_p') + raise ValueError("Specify one of p and logit_p") if p is not None: self._is_logit = False self.p = p = tt.as_tensor_variable(p) @@ -250,13 +275,13 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs): self.p = tt.nnet.sigmoid(logit_p) self._logit_p = tt.as_tensor_variable(logit_p) - self.mode = tt.cast(tround(self.p), 'int8') + self.mode = tt.cast(tround(self.p), "int8") def random(self, point=None, size=None): p = draw_values([self.p], point=point, size=size)[0] - return generate_samples(stats.bernoulli.rvs, p, - dist_shape=self.shape, - size=size) + return generate_samples( + stats.bernoulli.rvs, p, dist_shape=self.shape, size=size + ) def logp(self, value): if self._is_logit: @@ -266,20 +291,24 @@ def logp(self, value): p = self.p return bound( tt.switch(value, tt.log(p), tt.log(1 - p)), - value >= 0, value <= 1, - p >= 0, p <= 1) + value >= 0, + value <= 1, + p >= 0, + p <= 1, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self p = dist.p - name = r'\text{%s}' % name - return r'${} \sim \text{{Bernoulli}}(\mathit{{p}}={})$'.format(name, - get_variable_name(p)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Bernoulli}}(\mathit{{p}}={})$".format( + name, get_variable_name(p) + ) class DiscreteWeibull(Discrete): - R"""Discrete Weibull log-likelihood + r"""Discrete Weibull log-likelihood The discrete Weibull distribution is a flexible model of count data that can handle both over- and under-dispersion. @@ -316,8 +345,9 @@ def DiscreteWeibull(q, b, x): Variance :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2` ======== ====================== """ + def __init__(self, q, beta, *args, **kwargs): - super(DiscreteWeibull, self).__init__(*args, defaults=('median',), **kwargs) + super(DiscreteWeibull, self).__init__(*args, defaults=("median",), **kwargs) self.q = q = tt.as_tensor_variable(q) self.beta = beta = tt.as_tensor_variable(beta) @@ -328,10 +358,16 @@ def logp(self, value): q = self.q beta = self.beta - return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), - 0 <= value, - 0 < q, q < 1, - 0 < beta) + return bound( + tt.log( + tt.power(q, tt.power(value, beta)) + - tt.power(q, tt.power(value + 1, beta)) + ), + 0 <= value, + 0 < q, + q < 1, + 0 < beta, + ) def _ppf(self, p): """ @@ -341,33 +377,33 @@ def _ppf(self, p): q = self.q beta = self.beta - return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64') + return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype( + "int64" + ) def _random(self, q, beta, size=None): p = np.random.uniform(size=size) - return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1 + return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1 def random(self, point=None, size=None): q, beta = draw_values([self.q, self.beta], point=point, size=size) - return generate_samples(self._random, q, beta, - dist_shape=self.shape, - size=size) + return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self q = dist.q beta = dist.beta - name = r'\text{%s}' % name - return r'${} \sim \text{{DiscreteWeibull}}(\mathit{{q}}={},~\mathit{{beta}}={})$'.format(name, - get_variable_name(q), - get_variable_name(beta)) + name = r"\text{%s}" % name + return r"${} \sim \text{{DiscreteWeibull}}(\mathit{{q}}={},~\mathit{{beta}}={})$".format( + name, get_variable_name(q), get_variable_name(beta) + ) class Poisson(Discrete): - R""" + r""" Poisson log-likelihood. Often used to model the number of events occurring in a fixed period @@ -413,34 +449,30 @@ class Poisson(Discrete): def __init__(self, mu, *args, **kwargs): super(Poisson, self).__init__(*args, **kwargs) self.mu = mu = tt.as_tensor_variable(mu) - self.mode = tt.floor(mu).astype('int32') + self.mode = tt.floor(mu).astype("int32") def random(self, point=None, size=None): mu = draw_values([self.mu], point=point, size=size)[0] - return generate_samples(stats.poisson.rvs, mu, - dist_shape=self.shape, - size=size) + return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size) def logp(self, value): mu = self.mu - log_prob = bound( - logpow(mu, value) - factln(value) - mu, - mu >= 0, value >= 0) + log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0) # Return zero when mu and value are both zero - return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), - 0, log_prob) + return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self mu = dist.mu - name = r'\text{%s}' % name - return r'${} \sim \text{{Poisson}}(\mathit{{mu}}={})$'.format(name, - get_variable_name(mu)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Poisson}}(\mathit{{mu}}={})$".format( + name, get_variable_name(mu) + ) class NegativeBinomial(Discrete): - R""" + r""" Negative binomial log-likelihood. The negative binomial distribution describes a Poisson random variable @@ -493,42 +525,46 @@ def __init__(self, mu, alpha, *args, **kwargs): super(NegativeBinomial, self).__init__(*args, **kwargs) self.mu = mu = tt.as_tensor_variable(mu) self.alpha = alpha = tt.as_tensor_variable(alpha) - self.mode = tt.floor(mu).astype('int32') + self.mode = tt.floor(mu).astype("int32") def random(self, point=None, size=None): mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size) - g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha, - dist_shape=self.shape, - size=size) + g = generate_samples( + stats.gamma.rvs, alpha, scale=mu / alpha, dist_shape=self.shape, size=size + ) g[g == 0] = np.finfo(float).eps # Just in case return np.asarray(stats.poisson.rvs(g)).reshape(g.shape) def logp(self, value): mu = self.mu alpha = self.alpha - negbinom = bound(binomln(value + alpha - 1, value) - + logpow(mu / (mu + alpha), value) - + logpow(alpha / (mu + alpha), alpha), - value >= 0, mu > 0, alpha > 0) + negbinom = bound( + binomln(value + alpha - 1, value) + + logpow(mu / (mu + alpha), value) + + logpow(alpha / (mu + alpha), alpha), + value >= 0, + mu > 0, + alpha > 0, + ) # Return Poisson when alpha gets very large. - return tt.switch(tt.gt(alpha, 1e10), - Poisson.dist(self.mu).logp(value), - negbinom) + return tt.switch( + tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self mu = dist.mu alpha = dist.alpha - name = r'\text{%s}' % name - return r'${} \sim \text{{NegativeBinomial}}(\mathit{{mu}}={},~\mathit{{alpha}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(alpha)) + name = r"\text{%s}" % name + return r"${} \sim \text{{NegativeBinomial}}(\mathit{{mu}}={},~\mathit{{alpha}}={})$".format( + name, get_variable_name(mu), get_variable_name(alpha) + ) class Geometric(Discrete): - R""" + r""" Geometric log-likelihood. The probability that the first success in a sequence of Bernoulli @@ -571,26 +607,26 @@ def __init__(self, p, *args, **kwargs): def random(self, point=None, size=None): p = draw_values([self.p], point=point, size=size)[0] - return generate_samples(np.random.geometric, p, - dist_shape=self.shape, - size=size) + return generate_samples( + np.random.geometric, p, dist_shape=self.shape, size=size + ) def logp(self, value): p = self.p - return bound(tt.log(p) + logpow(1 - p, value - 1), - 0 <= p, p <= 1, value >= 1) + return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self p = dist.p - name = r'\text{%s}' % name - return r'${} \sim \text{{Geometric}}(\mathit{{p}}={})$'.format(name, - get_variable_name(p)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Geometric}}(\mathit{{p}}={})$".format( + name, get_variable_name(p) + ) class DiscreteUniform(Discrete): - R""" + r""" Discrete uniform distribution. The pmf of this distribution is @@ -630,10 +666,11 @@ class DiscreteUniform(Discrete): def __init__(self, lower, upper, *args, **kwargs): super(DiscreteUniform, self).__init__(*args, **kwargs) - self.lower = tt.floor(lower).astype('int32') - self.upper = tt.floor(upper).astype('int32') + self.lower = tt.floor(lower).astype("int32") + self.upper = tt.floor(upper).astype("int32") self.mode = tt.maximum( - tt.floor((upper + lower) / 2.).astype('int32'), self.lower) + tt.floor((upper + lower) / 2.0).astype("int32"), self.lower + ) def _random(self, lower, upper, size=None): # This way seems to be the only to deal with lower and upper @@ -643,30 +680,28 @@ def _random(self, lower, upper, size=None): def random(self, point=None, size=None): lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - return generate_samples(self._random, - lower, upper, - dist_shape=self.shape, - size=size) + return generate_samples( + self._random, lower, upper, dist_shape=self.shape, size=size + ) def logp(self, value): upper = self.upper lower = self.lower - return bound(-tt.log(upper - lower + 1), - lower <= value, value <= upper) + return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self lower = dist.lower upper = dist.upper - name = r'\text{%s}' % name - return r'${} \sim \text{{DiscreteUniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$'.format(name, - get_variable_name(lower), - get_variable_name(upper)) + name = r"\text{%s}" % name + return r"${} \sim \text{{DiscreteUniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$".format( + name, get_variable_name(lower), get_variable_name(upper) + ) class Categorical(Discrete): - R""" + r""" Categorical log-likelihood. The most general discrete distribution. The pmf of this distribution is @@ -713,11 +748,13 @@ def __init__(self, p, *args, **kwargs): def random(self, point=None, size=None): p, k = draw_values([self.p, self.k], point=point, size=size) - return generate_samples(random_choice, - p=p, - broadcast_shape=p.shape[:-1] or (1,), - dist_shape=self.shape, - size=size) + return generate_samples( + random_choice, + p=p, + broadcast_shape=p.shape[:-1] or (1,), + dist_shape=self.shape, + size=size, + ) def logp(self, value): p = self.p @@ -726,8 +763,7 @@ def logp(self, value): # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) - sumto1 = theano.gradient.zero_grad( - tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5)) + sumto1 = theano.gradient.zero_grad(tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5)) if p.ndim > 1: a = tt.log(p[tt.arange(p.shape[0]), value_clip]) @@ -740,9 +776,10 @@ def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self p = dist.p - name = r'\text{%s}' % name - return r'${} \sim \text{{Categorical}}(\mathit{{p}}={})$'.format(name, - get_variable_name(p)) + name = r"\text{%s}" % name + return r"${} \sim \text{{Categorical}}(\mathit{{p}}={})$".format( + name, get_variable_name(p) + ) class Constant(Discrete): @@ -756,8 +793,10 @@ class Constant(Discrete): """ def __init__(self, c, *args, **kwargs): - warnings.warn("Constant has been deprecated. We recommend using a Deterministic object instead.", - DeprecationWarning) + warnings.warn( + "Constant has been deprecated. We recommend using a Deterministic object instead.", + DeprecationWarning, + ) super(Constant, self).__init__(*args, **kwargs) self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c) @@ -768,8 +807,9 @@ def random(self, point=None, size=None): def _random(c, dtype=dtype, size=None): return np.full(size, fill_value=c, dtype=dtype) - return generate_samples(_random, c=c, dist_shape=self.shape, - size=size).astype(dtype) + return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype( + dtype + ) def logp(self, value): c = self.c @@ -778,15 +818,15 @@ def logp(self, value): def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self - name = r'\text{%s}' % name - return r'${} \sim \text{{Constant}}()$'.format(name) + name = r"\text{%s}" % name + return r"${} \sim \text{{Constant}}()$".format(name) ConstantDist = Constant class ZeroInflatedPoisson(Discrete): - R""" + r""" Zero-inflated Poisson log-likelihood. Often used to model the number of events occurring in a fixed period @@ -844,9 +884,7 @@ def __init__(self, psi, theta, *args, **kwargs): def random(self, point=None, size=None): theta, psi = draw_values([self.theta, self.psi], point=point, size=size) - g = generate_samples(stats.poisson.rvs, theta, - dist_shape=self.shape, - size=size) + g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size) return g * (np.random.random(np.squeeze(g.shape)) < psi) def logp(self, value): @@ -856,27 +894,24 @@ def logp(self, value): logp_val = tt.switch( tt.gt(value, 0), tt.log(psi) + self.pois.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) - theta)) + logaddexp(tt.log1p(-psi), tt.log(psi) - theta), + ) - return bound( - logp_val, - 0 <= value, - 0 <= psi, psi <= 1, - 0 <= theta) + return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self theta = dist.theta psi = dist.psi - name = r'\text{%s}' % name - return r'${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{theta}}={},~\mathit{{psi}}={})$'.format(name, - get_variable_name(theta), - get_variable_name(psi)) + name = r"\text{%s}" % name + return r"${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{theta}}={},~\mathit{{psi}}={})$".format( + name, get_variable_name(theta), get_variable_name(psi) + ) class ZeroInflatedBinomial(Discrete): - R""" + r""" Zero-inflated Binomial log-likelihood. The pmf of this distribution is @@ -936,9 +971,7 @@ def __init__(self, psi, n, p, *args, **kwargs): def random(self, point=None, size=None): n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size) - g = generate_samples(stats.binom.rvs, n, p, - dist_shape=self.shape, - size=size) + g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size) return g * (np.random.random(np.squeeze(g.shape)) < psi) def logp(self, value): @@ -949,13 +982,12 @@ def logp(self, value): logp_val = tt.switch( tt.gt(value, 0), tt.log(psi) + self.bin.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p))) + logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)), + ) return bound( - logp_val, - 0 <= value, value <= n, - 0 <= psi, psi <= 1, - 0 <= p, p <= 1) + logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1 + ) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -967,15 +999,16 @@ def _repr_latex_(self, name=None, dist=None): name_n = get_variable_name(n) name_p = get_variable_name(p) name_psi = get_variable_name(psi) - name = r'\text{%s}' % name - return (r'${} \sim \text{{ZeroInflatedBinomial}}' - r'(\mathit{{n}}={},~\mathit{{p}}={},~' - r'\mathit{{psi}}={})$' - .format(name, name_n, name_p, name_psi)) + name = r"\text{%s}" % name + return ( + r"${} \sim \text{{ZeroInflatedBinomial}}" + r"(\mathit{{n}}={},~\mathit{{p}}={},~" + r"\mathit{{psi}}={})$".format(name, name_n, name_p, name_psi) + ) class ZeroInflatedNegativeBinomial(Discrete): - R""" + r""" Zero-Inflated Negative binomial log-likelihood. The Zero-inflated version of the Negative Binomial (NB). @@ -1052,10 +1085,11 @@ def __init__(self, psi, mu, alpha, *args, **kwargs): def random(self, point=None, size=None): mu, alpha, psi = draw_values( - [self.mu, self.alpha, self.psi], point=point, size=size) - g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha, - dist_shape=self.shape, - size=size) + [self.mu, self.alpha, self.psi], point=point, size=size + ) + g = generate_samples( + stats.gamma.rvs, alpha, scale=mu / alpha, dist_shape=self.shape, size=size + ) g[g == 0] = np.finfo(float).eps # Just in case return stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi) @@ -1066,19 +1100,12 @@ def logp(self, value): logp_other = tt.log(psi) + self.nb.logp(value) logp_0 = logaddexp( - tt.log1p(-psi), - tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))) + tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)) + ) - logp_val = tt.switch( - tt.gt(value, 0), - logp_other, - logp_0) + logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0) - return bound( - logp_val, - 0 <= value, - 0 <= psi, psi <= 1, - mu > 0, alpha > 0) + return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -1090,15 +1117,16 @@ def _repr_latex_(self, name=None, dist=None): name_mu = get_variable_name(mu) name_alpha = get_variable_name(alpha) name_psi = get_variable_name(psi) - name = r'\text{%s}' % name - return (r'${} \sim \text{{ZeroInflatedNegativeBinomial}}' - r'(\mathit{{mu}}={},~\mathit{{alpha}}={},~' - r'\mathit{{psi}}={})$' - .format(name, name_mu, name_alpha, name_psi)) + name = r"\text{%s}" % name + return ( + r"${} \sim \text{{ZeroInflatedNegativeBinomial}}" + r"(\mathit{{mu}}={},~\mathit{{alpha}}={},~" + r"\mathit{{psi}}={})$".format(name, name_mu, name_alpha, name_psi) + ) class OrderedLogistic(Categorical): - R""" + r""" Ordered Logistic log-likelihood. Useful for regression on ordinal data values whose values range @@ -1167,11 +1195,14 @@ def __init__(self, eta, cutpoints, *args, **kwargs): self.cutpoints = tt.as_tensor_variable(cutpoints) pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta)) - p_cum = tt.concatenate([ - tt.zeros_like(tt.shape_padright(pa[:, 0])), - pa, - tt.ones_like(tt.shape_padright(pa[:, 0])) - ], axis=1) + p_cum = tt.concatenate( + [ + tt.zeros_like(tt.shape_padright(pa[:, 0])), + pa, + tt.ones_like(tt.shape_padright(pa[:, 0])), + ], + axis=1, + ) p = p_cum[:, 1:] - p_cum[:, :-1] super(OrderedLogistic, self).__init__(p=p, *args, **kwargs) @@ -1181,6 +1212,9 @@ def _repr_latex_(self, name=None, dist=None): dist = self name_eta = get_variable_name(dist.eta) name_cutpoints = get_variable_name(dist.cutpoints) - return (r'${} \sim \text{{OrderedLogistic}}' - r'(\mathit{{eta}}={}, \mathit{{cutpoints}}={}$' - .format(name, name_eta, name_cutpoints)) + return ( + r"${} \sim \text{{OrderedLogistic}}" + r"(\mathit{{eta}}={}, \mathit{{cutpoints}}={}$".format( + name, name_eta, name_cutpoints + ) + ) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 7ea1b6b28d..d12fb766c2 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -1,8 +1,8 @@ -''' +""" Created on Mar 7, 2011 @author: johnsalvatier -''' +""" from __future__ import division import numpy as np @@ -19,7 +19,7 @@ f = floatX -c = - .5 * np.log(2. * np.pi) +c = -0.5 * np.log(2.0 * np.pi) def bound(logp, *conditions, **kwargs): @@ -41,7 +41,7 @@ def bound(logp, *conditions, **kwargs): ------- logp with elements set to -inf where any condition is False """ - broadcast_conditions = kwargs.get('broadcast_conditions', True) + broadcast_conditions = kwargs.get("broadcast_conditions", True) if broadcast_conditions: alltrue = alltrue_elemwise @@ -86,7 +86,7 @@ def std_cdf(x): """ Calculates the standard normal cumulative distribution function. """ - return .5 + .5 * tt.erf(x / tt.sqrt(2.)) + return 0.5 + 0.5 * tt.erf(x / tt.sqrt(2.0)) def normal_lcdf(mu, sigma, x): @@ -94,8 +94,8 @@ def normal_lcdf(mu, sigma, x): z = (x - mu) / sigma return tt.switch( tt.lt(z, -1.0), - tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2., - tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.) + tt.log(tt.erfcx(-z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0, + tt.log1p(-tt.erfc(z / tt.sqrt(2.0)) / 2.0), ) @@ -103,8 +103,8 @@ def normal_lccdf(mu, sigma, x): z = (x - mu) / sigma return tt.switch( tt.gt(z, 1.0), - tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2., - tt.log1p(-tt.erfc(-z / tt.sqrt(2.)) / 2.) + tt.log(tt.erfcx(z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0, + tt.log1p(-tt.erfc(-z / tt.sqrt(2.0)) / 2.0), ) @@ -112,7 +112,7 @@ def sd2rho(sd): """ `sd -> rho` theano converter :math:`mu + sd*e = mu + log(1+exp(rho))*e`""" - return tt.log(tt.exp(tt.abs_(sd)) - 1.) + return tt.log(tt.exp(tt.abs_(sd)) - 1.0) def rho2sd(rho): @@ -145,16 +145,16 @@ def log_normal(x, mean, **kwargs): 4) `tau` that follows this equation :math:`tau = std^{-1}` ---- """ - sd = kwargs.get('sd') - w = kwargs.get('w') - rho = kwargs.get('rho') - tau = kwargs.get('tau') - eps = kwargs.get('eps', 0.) + sd = kwargs.get("sd") + w = kwargs.get("w") + rho = kwargs.get("rho") + tau = kwargs.get("tau") + eps = kwargs.get("eps", 0.0) check = sum(map(lambda a: a is not None, [sd, w, rho, tau])) if check > 1: - raise ValueError('more than one required kwarg is passed') + raise ValueError("more than one required kwarg is passed") if check == 0: - raise ValueError('none of required kwarg is passed') + raise ValueError("none of required kwarg is passed") if sd is not None: std = sd elif w is not None: @@ -162,9 +162,9 @@ def log_normal(x, mean, **kwargs): elif rho is not None: std = rho2sd(rho) else: - std = tau**(-1) + std = tau ** (-1) std += f(eps) - return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2. * std ** 2) + return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2.0 * std ** 2) def MvNormalLogp(): @@ -179,14 +179,14 @@ def MvNormalLogp(): delta : tt.matrix Array of deviations from the mean. """ - cov = tt.matrix('cov') + cov = tt.matrix("cov") cov.tag.test_value = floatX(np.eye(3)) - delta = tt.matrix('delta') + delta = tt.matrix("delta") delta.tag.test_value = floatX(np.zeros((2, 3))) - solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') - solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') - cholesky = Cholesky(lower=True, on_error='nan') + solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") + solve_upper = tt.slinalg.Solve(A_structure="upper_triangular") + cholesky = Cholesky(lower=True, on_error="nan") n, k = delta.shape n, k = f(n), f(k) @@ -200,14 +200,14 @@ def MvNormalLogp(): result = n * k * tt.log(f(2) * np.pi) result += f(2) * n * tt.sum(tt.log(diag)) result += (delta_trans ** f(2)).sum() - result = f(-.5) * result + result = f(-0.5) * result logp = tt.switch(ok, result, -np.inf) def dlogp(inputs, gradients): g_logp, = gradients cov, delta = inputs - g_logp.tag.test_value = floatX(1.) + g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) @@ -229,8 +229,7 @@ def dlogp(inputs, gradients): return [-0.5 * g_cov * g_logp, -g_delta * g_logp] - return theano.OpFromGraph( - [cov, delta], [logp], grad_overrides=dlogp, inline=True) + return theano.OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True) class SplineWrapper(theano.Op): @@ -238,7 +237,7 @@ class SplineWrapper(theano.Op): Creates a theano operation from scipy.interpolate.UnivariateSpline """ - __props__ = ('spline',) + __props__ = ("spline",) def __init__(self, spline): self.spline = spline @@ -249,14 +248,14 @@ def make_node(self, x): @property def grad_op(self): - if not hasattr(self, '_grad_op'): + if not hasattr(self, "_grad_op"): try: self._grad_op = SplineWrapper(self.spline.derivative()) except ValueError: self._grad_op = None if self._grad_op is None: - raise NotImplementedError('Spline of order 0 is not differentiable') + raise NotImplementedError("Spline of order 0 is not differentiable") return self._grad_op def perform(self, node, inputs, output_storage): @@ -270,18 +269,18 @@ def grad(self, inputs, grads): return [x_grad * self.grad_op(x)] - class I0e(UnaryScalarOp): """ Modified Bessel function of the first kind of order 0, exponentially scaled. """ - nfunc_spec = ('scipy.special.i0e', 1, 1) + + nfunc_spec = ("scipy.special.i0e", 1, 1) def impl(self, x): return scipy.special.i0e(x) -i0e = I0e(upgrade_to_float, name='i0e') +i0e = I0e(upgrade_to_float, name="i0e") def random_choice(*args, **kwargs): @@ -299,8 +298,8 @@ def random_choice(*args, **kwargs): random sample: array """ - p = kwargs.pop('p') - size = kwargs.pop('size') + p = kwargs.pop("p") + size = kwargs.pop("size") k = p.shape[-1] if p.ndim > 1: @@ -319,17 +318,17 @@ def zvalue(value, sd, mu): def incomplete_beta_cfe(a, b, x, small): - '''Incomplete beta continued fraction expansions + """Incomplete beta continued fraction expansions based on Cephes library by Steve Moshier (incbet.c). small: Choose element-wise which continued fraction expansion to use. - ''' - BIG = tt.constant(4.503599627370496e15, dtype='float64') - BIGINV = tt.constant(2.22044604925031308085e-16, dtype='float64') - THRESH = tt.constant(3. * np.MachAr().eps, dtype='float64') + """ + BIG = tt.constant(4.503599627370496e15, dtype="float64") + BIGINV = tt.constant(2.22044604925031308085e-16, dtype="float64") + THRESH = tt.constant(3.0 * np.MachAr().eps, dtype="float64") - zero = tt.constant(0., dtype='float64') - one = tt.constant(1., dtype='float64') - two = tt.constant(2., dtype='float64') + zero = tt.constant(0.0, dtype="float64") + one = tt.constant(1.0, dtype="float64") + two = tt.constant(2.0, dtype="float64") r = one k1 = a @@ -350,11 +349,7 @@ def incomplete_beta_cfe(a, b, x, small): qkm1 = one r = one - def _step( - i, - pkm1, pkm2, qkm1, qkm2, - k1, k2, k3, k4, k5, k6, k7, k8, r - ): + def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): xk = -(x * k1 * k2) / (k3 * k4) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk @@ -372,7 +367,7 @@ def _step( qkm1 = qk old_r = r - r = tt.switch(tt.eq(qk, zero), r, pk/qk) + r = tt.switch(tt.eq(qk, zero), r, pk / qk) k1 += one k2 += k26update @@ -384,10 +379,7 @@ def _step( k8 += two big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG) - biginv_cond = tt.or_( - tt.lt(tt.abs_(qk), BIGINV), - tt.lt(tt.abs_(pk), BIGINV) - ) + biginv_cond = tt.or_(tt.lt(tt.abs_(qk), BIGINV), tt.lt(tt.abs_(pk), BIGINV)) pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2) pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1) @@ -399,37 +391,37 @@ def _step( qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2) qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1) - return ((pkm1, pkm2, qkm1, qkm2, - k1, k2, k3, k4, k5, k6, k7, k8, r), - until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r)))) + return ( + (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), + until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))), + ) - (pkm1, pkm2, qkm1, qkm2, - k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( + (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( _step, sequences=[tt.arange(0, 300)], outputs_info=[ - e for e in - tt.cast((pkm1, pkm2, qkm1, qkm2, - k1, k2, k3, k4, k5, k6, k7, k8, r), - 'float64') - ] + e + for e in tt.cast( + (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64" + ) + ], ) return r[-1] def incomplete_beta_ps(a, b, value): - '''Power series for incomplete beta + """Power series for incomplete beta Use when b*x is small and value not too close to 1. Based on Cephes library by Steve Moshier (incbet.c) - ''' - one = tt.constant(1, dtype='float64') + """ + one = tt.constant(1, dtype="float64") ai = one / a u = (one - b) * value t1 = u / (a + one) t = u threshold = np.MachAr().eps * ai - s = tt.constant(0, dtype='float64') + s = tt.constant(0, dtype="float64") def _step(i, t, s): t *= (i - b) * value / i @@ -440,30 +432,22 @@ def _step(i, t, s): (t, s), _ = scan( _step, sequences=[tt.arange(2, 302)], - outputs_info=[ - e for e in - tt.cast((t, s), - 'float64') - ] + outputs_info=[e for e in tt.cast((t, s), "float64")], ) s = s[-1] + t1 + ai - t = ( - gammaln(a + b) - gammaln(a) - gammaln(b) + - a * tt.log(value) + - tt.log(s) - ) + t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * tt.log(value) + tt.log(s) return tt.exp(t) def incomplete_beta(a, b, value): - '''Incomplete beta implementation + """Incomplete beta implementation Power series and continued fraction expansions chosen for best numerical convergence across the board based on inputs. - ''' - machep = tt.constant(np.MachAr().eps, dtype='float64') - one = tt.constant(1, dtype='float64') + """ + machep = tt.constant(np.MachAr().eps, dtype="float64") + one = tt.constant(1, dtype="float64") w = one - value ps = incomplete_beta_ps(a, b, value) @@ -485,20 +469,17 @@ def incomplete_beta(a, b, value): # Direct incomplete beta accounting for flipped a, b. t = tt.exp( - a * tt.log(x) + b * tt.log(xc) + - gammaln(a + b) - gammaln(a) - gammaln(b) + - tt.log(w / a) + a * tt.log(x) + + b * tt.log(xc) + + gammaln(a + b) + - gammaln(a) + - gammaln(b) + + tt.log(w / a) ) - t = tt.switch( - flip, - tt.switch(tt.le(t, machep), one - machep, one - t), - t - ) + t = tt.switch(flip, tt.switch(tt.le(t, machep), one - machep, one - t), t) return tt.switch( tt.and_(flip, tt.and_(tt.le((b * x), one), tt.le(x, 0.95))), tps, - tt.switch( - tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), - ps, - t)) + tt.switch(tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), ps, t), + ) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 5783609765..28c97492af 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -7,13 +7,24 @@ import theano from ..memoize import memoize from ..model import ( - Model, get_named_nodes_and_relations, FreeRV, - ObservedRV, MultiObservedRV + Model, + get_named_nodes_and_relations, + FreeRV, + ObservedRV, + MultiObservedRV, ) from ..vartypes import string_types -__all__ = ['DensityDist', 'Distribution', 'Continuous', 'Discrete', - 'NoDistribution', 'TensorType', 'draw_values', 'generate_samples'] +__all__ = [ + "DensityDist", + "Distribution", + "Continuous", + "Discrete", + "NoDistribution", + "TensorType", + "draw_values", + "generate_samples", +] class _Unpickling(object): @@ -22,29 +33,34 @@ class _Unpickling(object): class Distribution(object): """Statistical distribution""" + def __new__(cls, name, *args, **kwargs): if name is _Unpickling: return object.__new__(cls) # for pickle try: model = Model.get_context() except TypeError: - raise TypeError("No model on context stack, which is needed to " - "instantiate distributions. Add variable inside " - "a 'with model:' block, or use the '.dist' syntax " - "for a standalone distribution.") + raise TypeError( + "No model on context stack, which is needed to " + "instantiate distributions. Add variable inside " + "a 'with model:' block, or use the '.dist' syntax " + "for a standalone distribution." + ) if isinstance(name, string_types): - data = kwargs.pop('observed', None) + data = kwargs.pop("observed", None) if isinstance(data, ObservedRV) or isinstance(data, FreeRV): - raise TypeError("observed needs to be data but got: {}".format(type(data))) - total_size = kwargs.pop('total_size', None) + raise TypeError( + "observed needs to be data but got: {}".format(type(data)) + ) + total_size = kwargs.pop("total_size", None) dist = cls.dist(*args, **kwargs) return model.Var(name, dist, data, total_size) else: raise TypeError("Name needs to be a string but got: {}".format(name)) def __getnewargs__(self): - return _Unpickling, + return (_Unpickling,) @classmethod def dist(cls, *args, **kwargs): @@ -52,8 +68,15 @@ def dist(cls, *args, **kwargs): dist.__init__(*args, **kwargs) return dist - def __init__(self, shape, dtype, testval=None, defaults=(), - transform=None, broadcastable=None): + def __init__( + self, + shape, + dtype, + testval=None, + defaults=(), + transform=None, + broadcastable=None, + ): self.shape = np.atleast_1d(shape) if False in (np.floor(self.shape) == self.shape): raise TypeError("Expected int elements in shape") @@ -75,10 +98,11 @@ def get_test_val(self, val, defaults): return self.getattr_value(val) if val is None: - raise AttributeError("%s has no finite default value to use, " - "checked: %s. Pass testval argument or " - "adjust so value is finite." - % (self, str(defaults))) + raise AttributeError( + "%s has no finite default value to use, " + "checked: %s. Pass testval argument or " + "adjust so value is finite." % (self, str(defaults)) + ) def getattr_value(self, val): if isinstance(val, string_types): @@ -128,20 +152,32 @@ def TensorType(dtype, shape, broadcastable=None): class NoDistribution(Distribution): - - def __init__(self, shape, dtype, testval=None, defaults=(), - transform=None, parent_dist=None, *args, **kwargs): - super(NoDistribution, self).__init__(shape=shape, dtype=dtype, - testval=testval, defaults=defaults, - *args, **kwargs) + def __init__( + self, + shape, + dtype, + testval=None, + defaults=(), + transform=None, + parent_dist=None, + *args, + **kwargs + ): + super(NoDistribution, self).__init__( + shape=shape, + dtype=dtype, + testval=testval, + defaults=defaults, + *args, + **kwargs + ) self.parent_dist = parent_dist def __getattr__(self, name): # Do not use __getstate__ and __setstate__ from parent_dist # to avoid infinite recursion during unpickling - if name.startswith('__'): - raise AttributeError( - "'NoDistribution' has no attribute '%s'" % name) + if name.startswith("__"): + raise AttributeError("'NoDistribution' has no attribute '%s'" % name) return getattr(self.parent_dist, name) def logp(self, x): @@ -151,33 +187,34 @@ def logp(self, x): class Discrete(Distribution): """Base class for discrete distributions""" - def __init__(self, shape=(), dtype=None, defaults=('mode',), - *args, **kwargs): + def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs): if dtype is None: - if theano.config.floatX == 'float32': - dtype = 'int16' + if theano.config.floatX == "float32": + dtype = "int16" else: - dtype = 'int64' - if dtype != 'int16' and dtype != 'int64': - raise TypeError('Discrete classes expect dtype to be int16 or int64.') + dtype = "int64" + if dtype != "int16" and dtype != "int64": + raise TypeError("Discrete classes expect dtype to be int16 or int64.") - if kwargs.get('transform', None) is not None: - raise ValueError("Transformations for discrete distributions " - "are not allowed.") + if kwargs.get("transform", None) is not None: + raise ValueError( + "Transformations for discrete distributions " "are not allowed." + ) - super(Discrete, self).__init__( - shape, dtype, defaults=defaults, *args, **kwargs) + super(Discrete, self).__init__(shape, dtype, defaults=defaults, *args, **kwargs) class Continuous(Distribution): """Base class for continuous distributions""" - def __init__(self, shape=(), dtype=None, defaults=('median', 'mean', 'mode'), - *args, **kwargs): + def __init__( + self, shape=(), dtype=None, defaults=("median", "mean", "mode"), *args, **kwargs + ): if dtype is None: dtype = theano.config.floatX super(Continuous, self).__init__( - shape, dtype, defaults=defaults, *args, **kwargs) + shape, dtype, defaults=defaults, *args, **kwargs + ) class DensityDist(Distribution): @@ -198,11 +235,12 @@ class DensityDist(Distribution): """ - def __init__(self, logp, shape=(), dtype=None, testval=0, random=None, *args, **kwargs): + def __init__( + self, logp, shape=(), dtype=None, testval=0, random=None, *args, **kwargs + ): if dtype is None: dtype = theano.config.floatX - super(DensityDist, self).__init__( - shape, dtype, testval, *args, **kwargs) + super(DensityDist, self).__init__(shape, dtype, testval, *args, **kwargs) self.logp = logp self.rand = random @@ -210,8 +248,10 @@ def random(self, *args, **kwargs): if self.rand is not None: return self.rand(*args, **kwargs) else: - raise ValueError("Distribution was not passed any random method " - "Define a custom random method and pass it as kwarg random") + raise ValueError( + "Distribution was not passed any random method " + "Define a custom random method and pass it as kwarg random" + ) def draw_values(params, point=None, size=None): @@ -239,7 +279,7 @@ def draw_values(params, point=None, size=None): named_nodes_parents = {} named_nodes_children = {} for param in params: - if hasattr(param, 'name'): + if hasattr(param, "name"): # Get the named nodes under the `param` node nn, nnp, nnc = get_named_nodes_and_relations(param) leaf_nodes.update(nn) @@ -265,8 +305,7 @@ def draw_values(params, point=None, size=None): if next_ in stored: # If the node already has a givens value, skip it continue - elif isinstance(next_, (tt.TensorConstant, - tt.sharedvar.SharedVariable)): + elif isinstance(next_, (tt.TensorConstant, tt.sharedvar.SharedVariable)): # If the node is a theano.tensor.TensorConstant or a # theano.tensor.sharedvar.SharedVariable, its value will be # available automatically in _compile_theano_function so @@ -287,19 +326,24 @@ def draw_values(params, point=None, size=None): try: # This may fail for autotransformed RVs, which don't # have the random method - givens[next_.name] = (next_, _draw_value(next_, - point=point, - givens=temp_givens, - size=size)) + givens[next_.name] = ( + next_, + _draw_value(next_, point=point, givens=temp_givens, size=size), + ) stored.add(next_.name) except theano.gof.fg.MissingInputError: # The node failed, so we must add the node's parents to # the stack of nodes to try to draw from. We exclude the # nodes in the `params` list. - stack.extend([node for node in named_nodes_parents[next_] - if node is not None and - node.name not in stored and - node not in params]) + stack.extend( + [ + node + for node in named_nodes_parents[next_] + if node is not None + and node.name not in stored + and node not in params + ] + ) # the below makes sure the graph is evaluated in order # test_distributions_random::TestDrawValues::test_draw_order fails without it @@ -309,22 +353,28 @@ def draw_values(params, point=None, size=None): missing_inputs = set(params) while to_eval or missing_inputs: if to_eval == missing_inputs: - raise ValueError('Cannot resolve inputs for {}'.format([str(params[j]) for j in to_eval])) + raise ValueError( + "Cannot resolve inputs for {}".format([str(params[j]) for j in to_eval]) + ) to_eval = set(missing_inputs) missing_inputs = set() for param_idx in to_eval: param = params[param_idx] - if hasattr(param, 'name') and param.name in givens: + if hasattr(param, "name") and param.name in givens: evaluated[param_idx] = givens[param.name][1] else: try: # might evaluate in a bad order, - evaluated[param_idx] = _draw_value(param, point=point, givens=givens.values(), size=size) - if isinstance(param, collections.Hashable) and named_nodes_parents.get(param): + evaluated[param_idx] = _draw_value( + param, point=point, givens=givens.values(), size=size + ) + if isinstance( + param, collections.Hashable + ) and named_nodes_parents.get(param): givens[param.name] = (param, evaluated[param_idx]) except theano.gof.fg.MissingInputError: missing_inputs.add(param_idx) - return [evaluated[j] for j in params] # set the order back + return [evaluated[j] for j in params] # set the order back @memoize @@ -346,10 +396,14 @@ def _compile_theano_function(param, vars, givens=None): A compiled theano function that takes the values of `vars` as input positional args """ - return function(vars, param, givens=givens, - rebuild_strict=True, - on_unused_input='ignore', - allow_input_downcast=True) + return function( + vars, + param, + givens=givens, + rebuild_strict=True, + on_unused_input="ignore", + allow_input_downcast=True, + ) def _draw_value(param, point=None, givens=None, size=None): @@ -378,14 +432,16 @@ def _draw_value(param, point=None, givens=None, size=None): elif isinstance(param, tt.sharedvar.SharedVariable): return param.get_value() elif isinstance(param, (tt.TensorVariable, MultiObservedRV)): - if point and hasattr(param, 'model') and param.name in point: + if point and hasattr(param, "model") and param.name in point: return point[param.name] - elif hasattr(param, 'random') and param.random is not None: + elif hasattr(param, "random") and param.random is not None: return param.random(point=point, size=size) - elif (hasattr(param, 'distribution') and - hasattr(param.distribution, 'random') and - param.distribution.random is not None): - if hasattr(param, 'observations'): + elif ( + hasattr(param, "distribution") + and hasattr(param.distribution, "random") + and param.distribution.random is not None + ): + if hasattr(param, "observations"): # shape inspection for ObservedRV dist_tmp = param.distribution try: @@ -411,11 +467,17 @@ def _draw_value(param, point=None, givens=None, size=None): else: variables = values = [] func = _compile_theano_function(param, variables) - if size and values and not all(var.dshape == val.shape for var, val in zip(variables, values)): + if ( + size + and values + and not all( + var.dshape == val.shape for var, val in zip(variables, values) + ) + ): return np.array([func(*v) for v in zip(*values)]) else: return func(*values) - raise ValueError('Unexpected type in draw_value: %s' % type(param)) + raise ValueError("Unexpected type in draw_value: %s" % type(param)) def to_tuple(shape): @@ -424,15 +486,17 @@ def to_tuple(shape): return tuple() return tuple(np.atleast_1d(shape)) + def _is_one_d(dist_shape): - if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)): + if hasattr(dist_shape, "dshape") and dist_shape.dshape in ((), (0,), (1,)): return True - elif hasattr(dist_shape, 'shape') and dist_shape.shape in ((), (0,), (1,)): + elif hasattr(dist_shape, "shape") and dist_shape.shape in ((), (0,), (1,)): return True elif to_tuple(dist_shape) == (): return True return False + def generate_samples(generator, *args, **kwargs): """Generate samples from the distribution of a random variable. @@ -462,10 +526,10 @@ def generate_samples(generator, *args, **kwargs): Any remaining *args and **kwargs are passed on to the generator function. """ - dist_shape = kwargs.pop('dist_shape', ()) + dist_shape = kwargs.pop("dist_shape", ()) one_d = _is_one_d(dist_shape) - size = kwargs.pop('size', None) - broadcast_shape = kwargs.pop('broadcast_shape', None) + size = kwargs.pop("size", None) + broadcast_shape = kwargs.pop("broadcast_shape", None) if size is None: size = 1 @@ -481,8 +545,13 @@ def generate_samples(generator, *args, **kwargs): broadcast_shape = np.broadcast(*inputs).shape # size of generator(size=1) except ValueError: # sometimes happens if args have shape (500,) and (500, 4) max_dims = max(j.ndim for j in args + tuple(kwargs.values())) - args = tuple([j.reshape(j.shape + (1,) * (max_dims - j.ndim)) for j in args]) - kwargs = {k: v.reshape(v.shape + (1,) * (max_dims - v.ndim)) for k, v in kwargs.items()} + args = tuple( + [j.reshape(j.shape + (1,) * (max_dims - j.ndim)) for j in args] + ) + kwargs = { + k: v.reshape(v.shape + (1,) * (max_dims - v.ndim)) + for k, v in kwargs.items() + } inputs = args + tuple(kwargs.values()) broadcast_shape = np.broadcast(*inputs).shape # size of generator(size=1) @@ -494,7 +563,7 @@ def generate_samples(generator, *args, **kwargs): if broadcast_shape in {(), (0,), (1,)}: samples = generator(size=size_tup + dist_shape, *args, **kwargs) # Inputs already have the right shape. Just get the right size. - elif broadcast_shape[-len(dist_shape):] == dist_shape or len(dist_shape) == 0: + elif broadcast_shape[-len(dist_shape) :] == dist_shape or len(dist_shape) == 0: if size == 1 or (broadcast_shape == size_tup + dist_shape): samples = generator(size=broadcast_shape, *args, **kwargs) elif dist_shape == broadcast_shape: @@ -502,26 +571,36 @@ def generate_samples(generator, *args, **kwargs): else: samples = None # Args have been broadcast correctly, can just ask for the right shape out - elif dist_shape[-len(broadcast_shape):] == broadcast_shape: + elif dist_shape[-len(broadcast_shape) :] == broadcast_shape: samples = generator(size=size_tup + dist_shape, *args, **kwargs) # Inputs have the right size, have to manually broadcast to the right dist_shape - elif broadcast_shape[:len(size_tup)] == size_tup: - suffix = broadcast_shape[len(size_tup):] + dist_shape - samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))] + elif broadcast_shape[: len(size_tup)] == size_tup: + suffix = broadcast_shape[len(size_tup) :] + dist_shape + samples = [ + generator(*args, **kwargs).reshape(size_tup + (1,)) + for _ in range(np.prod(suffix, dtype=int)) + ] samples = np.hstack(samples).reshape(size_tup + suffix) else: samples = None if samples is None: - raise TypeError('''Attempted to generate values with incompatible shapes: + raise TypeError( + """Attempted to generate values with incompatible shapes: size: {size} dist_shape: {dist_shape} broadcast_shape: {broadcast_shape} - '''.format(size=size, dist_shape=dist_shape, broadcast_shape=broadcast_shape)) + """.format( + size=size, dist_shape=dist_shape, broadcast_shape=broadcast_shape + ) + ) # reshape samples here if samples.shape[0] == 1 and size == 1: - if len(samples.shape) > len(dist_shape) and samples.shape[-len(dist_shape):] == dist_shape: + if ( + len(samples.shape) > len(dist_shape) + and samples.shape[-len(dist_shape) :] == dist_shape + ): samples = samples.reshape(samples.shape[1:]) if one_d and samples.shape[-1] == 1: diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index 38f34d6c0a..fd7a47936c 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -19,7 +19,7 @@ def all_discrete(comp_dists): class Mixture(Distribution): - R""" + r""" Mixture log-likelihood Often used to model subpopulation heterogeneity @@ -69,23 +69,23 @@ class Mixture(Distribution): """ def __init__(self, w, comp_dists, *args, **kwargs): - shape = kwargs.pop('shape', ()) + shape = kwargs.pop("shape", ()) self.w = w = tt.as_tensor_variable(w) self.comp_dists = comp_dists - defaults = kwargs.pop('defaults', []) + defaults = kwargs.pop("defaults", []) if all_discrete(comp_dists): - dtype = kwargs.pop('dtype', 'int64') + dtype = kwargs.pop("dtype", "int64") else: - dtype = kwargs.pop('dtype', 'float64') + dtype = kwargs.pop("dtype", "float64") try: self.mean = (w * self._comp_means()).sum(axis=-1) - if 'mean' not in defaults: - defaults.append('mean') + if "mean" not in defaults: + defaults.append("mean") except AttributeError: pass @@ -94,13 +94,12 @@ def __init__(self, w, comp_dists, *args, **kwargs): comp_mode_logps = self.logp(comp_modes) self.mode = comp_modes[tt.argmax(w * comp_mode_logps, axis=-1)] - if 'mode' not in defaults: - defaults.append('mode') + if "mode" not in defaults: + defaults.append("mode") except (AttributeError, ValueError, IndexError): pass - super(Mixture, self).__init__(shape, dtype, defaults=defaults, - *args, **kwargs) + super(Mixture, self).__init__(shape, dtype, defaults=defaults, *args, **kwargs) def _comp_logp(self, value): comp_dists = self.comp_dists @@ -110,41 +109,49 @@ def _comp_logp(self, value): return comp_dists.logp(value_) except AttributeError: - return tt.squeeze(tt.stack([comp_dist.logp(value) - for comp_dist in comp_dists], - axis=1)) + return tt.squeeze( + tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=1) + ) def _comp_means(self): try: return tt.as_tensor_variable(self.comp_dists.mean) except AttributeError: - return tt.squeeze(tt.stack([comp_dist.mean - for comp_dist in self.comp_dists], - axis=1)) + return tt.squeeze( + tt.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=1) + ) def _comp_modes(self): try: return tt.as_tensor_variable(self.comp_dists.mode) except AttributeError: - return tt.squeeze(tt.stack([comp_dist.mode - for comp_dist in self.comp_dists], - axis=1)) + return tt.squeeze( + tt.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=1) + ) def _comp_samples(self, point=None, size=None): try: samples = self.comp_dists.random(point=point, size=size) except AttributeError: - samples = np.column_stack([comp_dist.random(point=point, size=size) - for comp_dist in self.comp_dists]) + samples = np.column_stack( + [ + comp_dist.random(point=point, size=size) + for comp_dist in self.comp_dists + ] + ) return np.squeeze(samples) def logp(self, value): w = self.w - return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1), - w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1), - broadcast_conditions=False) + return bound( + logsumexp(tt.log(w) + self._comp_logp(value), axis=-1), + w >= 0, + w <= 1, + tt.allclose(w.sum(axis=-1), 1), + broadcast_conditions=False, + ) def random(self, point=None, size=None): w = draw_values([self.w], point=point)[0] @@ -157,26 +164,34 @@ def random(self, point=None, size=None): # Normalize inputs w /= w.sum(axis=-1, keepdims=True) - w_samples = generate_samples(random_choice, - p=w, - broadcast_shape=w.shape[:-1] or (1,), - dist_shape=distshape, - size=size).squeeze() + w_samples = generate_samples( + random_choice, + p=w, + broadcast_shape=w.shape[:-1] or (1,), + dist_shape=distshape, + size=size, + ).squeeze() if (size is None) or (distshape.size == 0): comp_samples = self._comp_samples(point=point, size=size) if comp_samples.ndim > 1: - samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples]) + samples = np.squeeze( + comp_samples[np.arange(w_samples.size), ..., w_samples] + ) else: samples = np.squeeze(comp_samples[w_samples]) else: if w_samples.ndim == 1: - w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape) - samples = np.zeros((size,)+tuple(distshape)) + w_samples = np.reshape( + np.tile(w_samples, size), (size,) + w_samples.shape + ) + samples = np.zeros((size,) + tuple(distshape)) for i in range(size): w_tmp = w_samples[i, :] comp_tmp = self._comp_samples(point=point, size=None) if comp_tmp.ndim > 1: - samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp]) + samples[i, :] = np.squeeze( + comp_tmp[np.arange(w_tmp.size), ..., w_tmp] + ) else: samples[i, :] = np.squeeze(comp_tmp[w_tmp]) @@ -184,7 +199,7 @@ def random(self, point=None, size=None): class NormalMixture(Mixture): - R""" + r""" Normal mixture log-likelihood .. math:: @@ -217,14 +232,14 @@ class NormalMixture(Mixture): """ def __init__(self, w, mu, comp_shape=(), *args, **kwargs): - _, sd = get_tau_sd(tau=kwargs.pop('tau', None), - sd=kwargs.pop('sd', None)) + _, sd = get_tau_sd(tau=kwargs.pop("tau", None), sd=kwargs.pop("sd", None)) self.mu = mu = tt.as_tensor_variable(mu) self.sd = sd = tt.as_tensor_variable(sd) - super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=comp_shape), - *args, **kwargs) + super(NormalMixture, self).__init__( + w, Normal.dist(mu, sd=sd, shape=comp_shape), *args, **kwargs + ) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -232,8 +247,7 @@ def _repr_latex_(self, name=None, dist=None): mu = dist.mu w = dist.w sd = dist.sd - name = r'\text{%s}' % name - return r'${} \sim \text{{NormalMixture}}(\mathit{{w}}={},~\mathit{{mu}}={},~\mathit{{sigma}}={})$'.format(name, - get_variable_name(w), - get_variable_name(mu), - get_variable_name(sd)) + name = r"\text{%s}" % name + return r"${} \sim \text{{NormalMixture}}(\mathit{{w}}={},~\mathit{{mu}}={},~\mathit{{sigma}}={})$".format( + name, get_variable_name(w), get_variable_name(mu), get_variable_name(sd) + ) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 5c45d34f8b..d682a86427 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -24,15 +24,24 @@ from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker -__all__ = ['MvNormal', 'MvStudentT', 'Dirichlet', - 'Multinomial', 'Wishart', 'WishartBartlett', - 'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal', - 'KroneckerNormal'] +__all__ = [ + "MvNormal", + "MvStudentT", + "Dirichlet", + "Multinomial", + "Wishart", + "WishartBartlett", + "LKJCorr", + "LKJCholeskyCov", + "MatrixNormal", + "KroneckerNormal", +] class _QuadFormBase(Continuous): - def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, - *args, **kwargs): + def __init__( + self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **kwargs + ): super(_QuadFormBase, self).__init__(*args, **kwargs) if len(self.shape) > 2: raise ValueError("Only 1 or 2 dimensions are allowed.") @@ -40,40 +49,42 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, if chol is not None and not lower: chol = chol.T if len([i for i in [tau, cov, chol] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of tau, cov, ' - 'or chol.') + raise ValueError( + "Incompatible parameterization. " + "Specify exactly one of tau, cov, " + "or chol." + ) self.mu = mu = tt.as_tensor_variable(mu) self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") # Step methods and advi do not catch LinAlgErrors at the # moment. We work around that by using a cholesky op # that returns a nan as first entry instead of raising # an error. - cholesky = Cholesky(lower=True, on_error='nan') + cholesky = Cholesky(lower=True, on_error="nan") if cov is not None: self.k = cov.shape[0] - self._cov_type = 'cov' + self._cov_type = "cov" cov = tt.as_tensor_variable(cov) if cov.ndim != 2: - raise ValueError('cov must be two dimensional.') + raise ValueError("cov must be two dimensional.") self.chol_cov = cholesky(cov) self.cov = cov self._n = self.cov.shape[-1] elif tau is not None: self.k = tau.shape[0] - self._cov_type = 'tau' + self._cov_type = "tau" tau = tt.as_tensor_variable(tau) if tau.ndim != 2: - raise ValueError('tau must be two dimensional.') + raise ValueError("tau must be two dimensional.") self.chol_tau = cholesky(tau) self.tau = tau self._n = self.tau.shape[-1] else: self.k = chol.shape[0] - self._cov_type = 'chol' + self._cov_type = "chol" if chol.ndim != 2: - raise ValueError('chol must be two dimensional.') + raise ValueError("chol must be two dimensional.") self.chol_cov = tt.as_tensor_variable(chol) self._n = self.chol_cov.shape[-1] @@ -81,7 +92,7 @@ def _quaddist(self, value): """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma.""" mu = self.mu if value.ndim > 2 or value.ndim == 0: - raise ValueError('Invalid dimension for value: %s' % value.ndim) + raise ValueError("Invalid dimension for value: %s" % value.ndim) if value.ndim == 1: onedim = True value = value[None, :] @@ -90,11 +101,11 @@ def _quaddist(self, value): delta = value - mu - if self._cov_type == 'cov': + if self._cov_type == "cov": # Use this when Theano#5908 is released. # return MvNormalLogp()(self.cov, delta) dist, logdet, ok = self._quaddist_cov(delta) - elif self._cov_type == 'tau': + elif self._cov_type == "tau": dist, logdet, ok = self._quaddist_tau(delta) else: dist, logdet, ok = self._quaddist_chol(delta) @@ -140,19 +151,19 @@ def _quaddist_tau(self, delta): def _repr_cov_params(self, dist=None): if dist is None: dist = self - if self._cov_type == 'chol': + if self._cov_type == "chol": chol = get_variable_name(self.chol) - return r'\mathit{{chol}}={}'.format(chol) - elif self._cov_type == 'cov': + return r"\mathit{{chol}}={}".format(chol) + elif self._cov_type == "cov": cov = get_variable_name(self.cov) - return r'\mathit{{cov}}={}'.format(cov) - elif self._cov_type == 'tau': + return r"\mathit{{cov}}={}".format(cov) + elif self._cov_type == "tau": tau = get_variable_name(self.tau) - return r'\mathit{{tau}}={}'.format(tau) + return r"\mathit{{tau}}={}".format(tau) class MvNormal(_QuadFormBase): - R""" + r""" Multivariate normal log-likelihood. .. math:: @@ -218,10 +229,10 @@ class MvNormal(_QuadFormBase): vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T) """ - def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, - *args, **kwargs): - super(MvNormal, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol, - lower=lower, *args, **kwargs) + def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs): + super(MvNormal, self).__init__( + mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs + ) self.mean = self.median = self.mode = self.mu = self.mu def random(self, point=None, size=None): @@ -233,19 +244,18 @@ def random(self, point=None, size=None): except TypeError: size = [size] - if self._cov_type == 'cov': + if self._cov_type == "cov": mu, cov = draw_values([self.mu, self.cov], point=point, size=size) if mu.shape[-1] != cov.shape[-1]: raise ValueError("Shapes for mu and cov don't match") try: - dist = stats.multivariate_normal( - mean=mu, cov=cov, allow_singular=True) + dist = stats.multivariate_normal(mean=mu, cov=cov, allow_singular=True) except ValueError: size.append(mu.shape[-1]) return np.nan * np.zeros(size) return dist.rvs(size) - elif self._cov_type == 'chol': + elif self._cov_type == "chol": mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size) if mu.shape[-1] != chol[0].shape[-1]: raise ValueError("Shapes for mu and chol don't match") @@ -265,14 +275,13 @@ def random(self, point=None, size=None): return np.nan * np.zeros(size) standard_normal = np.random.standard_normal(size) - transformed = linalg.solve_triangular( - chol, standard_normal.T, lower=True) + transformed = linalg.solve_triangular(chol, standard_normal.T, lower=True) return mu + transformed.T def logp(self, value): quaddist, logdet, ok = self._quaddist(value) k = value.shape[-1].astype(theano.config.floatX) - norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi)) + norm = -0.5 * k * pm.floatX(np.log(2 * np.pi)) return bound(norm - 0.5 * quaddist - logdet, ok) def _repr_latex_(self, name=None, dist=None): @@ -280,13 +289,13 @@ def _repr_latex_(self, name=None, dist=None): dist = self mu = dist.mu name_mu = get_variable_name(mu) - return (r'${} \sim \text{{MvNormal}}' - r'(\mathit{{mu}}={}, {})$' - .format(name, name_mu, self._repr_cov_params(dist))) + return r"${} \sim \text{{MvNormal}}" r"(\mathit{{mu}}={}, {})$".format( + name, name_mu, self._repr_cov_params(dist) + ) class MvStudentT(_QuadFormBase): - R""" + r""" Multivariate Student-T log-likelihood. .. math:: @@ -326,23 +335,34 @@ class MvStudentT(_QuadFormBase): Whether the cholesky fatcor is given as a lower triangular matrix. """ - def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, - lower=True, *args, **kwargs): + def __init__( + self, + nu, + Sigma=None, + mu=None, + cov=None, + tau=None, + chol=None, + lower=True, + *args, + **kwargs + ): if Sigma is not None: if cov is not None: - raise ValueError('Specify only one of cov and Sigma') + raise ValueError("Specify only one of cov and Sigma") cov = Sigma - super(MvStudentT, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol, - lower=lower, *args, **kwargs) + super(MvStudentT, self).__init__( + mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs + ) self.nu = nu = tt.as_tensor_variable(nu) self.mean = self.median = self.mode = self.mu = self.mu def random(self, point=None, size=None): nu, mu = draw_values([self.nu, self.mu], point=point, size=size) - if self._cov_type == 'cov': + if self._cov_type == "cov": cov, = draw_values([self.cov], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov) - elif self._cov_type == 'tau': + elif self._cov_type == "tau": tau, = draw_values([self.tau], point=point, size=size) dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau) else: @@ -358,10 +378,12 @@ def logp(self, value): quaddist, logdet, ok = self._quaddist(value) k = value.shape[-1].astype(theano.config.floatX) - norm = (gammaln((self.nu + k) / 2.) - - gammaln(self.nu / 2.) - - 0.5 * k * floatX(np.log(self.nu * np.pi))) - inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu) + norm = ( + gammaln((self.nu + k) / 2.0) + - gammaln(self.nu / 2.0) + - 0.5 * k * floatX(np.log(self.nu * np.pi)) + ) + inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu) return bound(norm + inner - logdet, ok) def _repr_latex_(self, name=None, dist=None): @@ -371,14 +393,15 @@ def _repr_latex_(self, name=None, dist=None): nu = dist.nu name_nu = get_variable_name(nu) name_mu = get_variable_name(mu) - return (r'${} \sim \text{{MvStudentT}}' - r'(\mathit{{nu}}={}, \mathit{{mu}}={}, ' - r'{})$' - .format(name, name_nu, name_mu, self._repr_cov_params(dist))) + return ( + r"${} \sim \text{{MvStudentT}}" + r"(\mathit{{nu}}={}, \mathit{{mu}}={}, " + r"{})$".format(name, name_nu, name_mu, self._repr_cov_params(dist)) + ) class Dirichlet(Continuous): - R""" + r""" Dirichlet log-likelihood. .. math:: @@ -401,8 +424,7 @@ class Dirichlet(Continuous): Concentration parameters (a > 0). """ - def __init__(self, a, transform=transforms.stick_breaking, - *args, **kwargs): + def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): shape = np.atleast_1d(a.shape)[-1] kwargs.setdefault("shape", shape) @@ -413,15 +435,13 @@ def __init__(self, a, transform=transforms.stick_breaking, self.a = a = tt.as_tensor_variable(a) self.mean = a / tt.sum(a) - self.mode = tt.switch(tt.all(a > 1), - (a - 1) / tt.sum(a - 1), - np.nan) + self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan) def _random(self, a, size=None): gen = stats.dirichlet.rvs shape = tuple(np.atleast_1d(self.shape)) - if size[-len(shape):] == shape: - real_size = size[:-len(shape)] + if size[-len(shape) :] == shape: + real_size = size[: -len(shape)] else: real_size = size if self.size_prefix: @@ -440,10 +460,7 @@ def _random(self, a, size=None): def random(self, point=None, size=None): a = draw_values([self.a], point=point, size=size)[0] - samples = generate_samples(self._random, - a=a, - dist_shape=self.shape, - size=size) + samples = generate_samples(self._random, a=a, dist_shape=self.shape, size=size) return samples def logp(self, value): @@ -451,22 +468,27 @@ def logp(self, value): a = self.a # only defined for sum(value) == 1 - return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) - + gammaln(tt.sum(a, axis=-1)), - tt.all(value >= 0), tt.all(value <= 1), - k > 1, tt.all(a > 0), - broadcast_conditions=False) + return bound( + tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + + gammaln(tt.sum(a, axis=-1)), + tt.all(value >= 0), + tt.all(value <= 1), + k > 1, + tt.all(a > 0), + broadcast_conditions=False, + ) def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self a = dist.a - return r'${} \sim \text{{Dirichlet}}(\mathit{{a}}={})$'.format(name, - get_variable_name(a)) + return r"${} \sim \text{{Dirichlet}}(\mathit{{a}}={})$".format( + name, get_variable_name(a) + ) class Multinomial(Discrete): - R""" + r""" Multinomial log-likelihood. Generalizes binomial distribution, but instead of each trial resulting @@ -502,7 +524,7 @@ def __init__(self, n, p, *args, **kwargs): super(Multinomial, self).__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) - n = np.squeeze(n) # works also if n is a tensor + n = np.squeeze(n) # works also if n is a tensor if len(self.shape) > 1: m = self.shape[-2] @@ -521,22 +543,23 @@ def __init__(self, n, p, *args, **kwargs): self.p = tt.as_tensor_variable(p) self.mean = self.n * self.p - mode = tt.cast(tt.round(self.mean), 'int32') + mode = tt.cast(tt.round(self.mean), "int32") diff = self.n - tt.sum(mode, axis=-1, keepdims=True) inc_bool_arr = tt.abs_(diff) > 0 - mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], - diff[inc_bool_arr.nonzero()]) + mode = tt.inc_subtensor( + mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()] + ) self.mode = mode def _random(self, n, p, size=None): original_dtype = p.dtype # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317) - p = p.astype('float64') + p = p.astype("float64") # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals if size == p.shape: size = None - elif size[-len(p.shape):] == p.shape: - size = size[:len(size) - len(p.shape)] + elif size[-len(p.shape) :] == p.shape: + size = size[: len(size) - len(p.shape)] n_dim = n.squeeze().ndim @@ -545,32 +568,27 @@ def _random(self, n, p, size=None): randnum = np.random.multinomial(n, p.squeeze(), size=size) elif (n_dim == 0) and (p.ndim > 1): p = p / p.sum(axis=1, keepdims=True) - randnum = np.asarray([ - np.random.multinomial(n.squeeze(), pp, size=size) - for pp in p - ]) + randnum = np.asarray( + [np.random.multinomial(n.squeeze(), pp, size=size) for pp in p] + ) randnum = np.moveaxis(randnum, 1, 0) elif (n_dim > 0) and (p.ndim == 1): p = p / p.sum() - randnum = np.asarray([ - np.random.multinomial(nn, p.squeeze(), size=size) - for nn in n - ]) + randnum = np.asarray( + [np.random.multinomial(nn, p.squeeze(), size=size) for nn in n] + ) randnum = np.moveaxis(randnum, 1, 0) else: p = p / p.sum(axis=1, keepdims=True) - randnum = np.asarray([ - np.random.multinomial(nn, pp, size=size) - for (nn, pp) in zip(n, p) - ]) + randnum = np.asarray( + [np.random.multinomial(nn, pp, size=size) for (nn, pp) in zip(n, p)] + ) randnum = np.moveaxis(randnum, 1, 0) return randnum.astype(original_dtype) def random(self, point=None, size=None): n, p = draw_values([self.n, self.p], point=point, size=size) - samples = generate_samples(self._random, n, p, - dist_shape=self.shape, - size=size) + samples = generate_samples(self._random, n, p, dist_shape=self.shape, size=size) return samples def logp(self, x): @@ -584,7 +602,7 @@ def logp(self, x): tt.all(p <= 1), tt.all(tt.eq(tt.sum(p, axis=-1), 1)), tt.all(tt.ge(n, 0)), - broadcast_conditions=False + broadcast_conditions=False, ) def _repr_latex_(self, name=None, dist=None): @@ -592,9 +610,9 @@ def _repr_latex_(self, name=None, dist=None): dist = self n = dist.n p = dist.p - return r'${} \sim \text{{Multinomial}}(\mathit{{n}}={}, \mathit{{p}}={})$'.format(name, - get_variable_name(n), - get_variable_name(p)) + return r"${} \sim \text{{Multinomial}}(\mathit{{n}}={}, \mathit{{p}}={})$".format( + name, get_variable_name(n), get_variable_name(p) + ) def posdef(AA): @@ -619,7 +637,7 @@ class PosDefMatrix(theano.Op): def make_node(self, x): x = tt.as_tensor_variable(x) assert x.ndim == 2 - o = tt.TensorType(dtype='int8', broadcastable=[])() + o = tt.TensorType(dtype="int8", broadcastable=[])() return theano.Apply(self, [x], [o]) # Python implementation: @@ -628,9 +646,9 @@ def perform(self, node, inputs, outputs): (x,) = inputs (z,) = outputs try: - z[0] = np.array(posdef(x), dtype='int8') + z[0] = np.array(posdef(x), dtype="int8") except Exception: - pm._log.exception('Failed to check if %s positive definite', x) + pm._log.exception("Failed to check if %s positive definite", x) raise def infer_shape(self, node, shapes): @@ -643,11 +661,12 @@ def grad(self, inp, grads): def __str__(self): return "MatrixIsPositiveDefinite" + matrix_pos_def = PosDefMatrix() class Wishart(Continuous): - R""" + r""" Wishart log-likelihood. The Wishart distribution is the probability distribution of the @@ -685,26 +704,27 @@ class Wishart(Continuous): def __init__(self, nu, V, *args, **kwargs): super(Wishart, self).__init__(*args, **kwargs) - warnings.warn('The Wishart distribution can currently not be used ' - 'for MCMC sampling. The probability of sampling a ' - 'symmetric matrix is basically zero. Instead, please ' - 'use LKJCholeskyCov or LKJCorr. For more information ' - 'on the issues surrounding the Wishart see here: ' - 'https://github.com/pymc-devs/pymc3/issues/538.', - UserWarning) + warnings.warn( + "The Wishart distribution can currently not be used " + "for MCMC sampling. The probability of sampling a " + "symmetric matrix is basically zero. Instead, please " + "use LKJCholeskyCov or LKJCorr. For more information " + "on the issues surrounding the Wishart see here: " + "https://github.com/pymc-devs/pymc3/issues/538.", + UserWarning, + ) self.nu = nu = tt.as_tensor_variable(nu) self.p = p = tt.as_tensor_variable(V.shape[0]) self.V = V = tt.as_tensor_variable(V) self.mean = nu * V - self.mode = tt.switch(tt.ge(nu, p + 1), - (nu - p - 1) * V, - np.nan) + self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan) def random(self, point=None, size=None): nu, V = draw_values([self.nu, self.V], point=point, size=size) - size= 1 if size is None else size - return generate_samples(stats.wishart.rvs, np.asscalar(nu), V, - broadcast_shape=(size,)) + size = 1 if size is None else size + return generate_samples( + stats.wishart.rvs, np.asscalar(nu), V, broadcast_shape=(size,) + ) def logp(self, X): nu = self.nu @@ -714,14 +734,19 @@ def logp(self, X): IVI = det(V) IXI = det(X) - return bound(((nu - p - 1) * tt.log(IXI) - - trace(matrix_inverse(V).dot(X)) - - nu * p * tt.log(2) - nu * tt.log(IVI) - - 2 * multigammaln(nu / 2., p)) / 2, - matrix_pos_def(X), - tt.eq(X, X.T), - nu > (p - 1), - broadcast_conditions=False + return bound( + ( + (nu - p - 1) * tt.log(IXI) + - trace(matrix_inverse(V).dot(X)) + - nu * p * tt.log(2) + - nu * tt.log(IVI) + - 2 * multigammaln(nu / 2.0, p) + ) + / 2, + matrix_pos_def(X), + tt.eq(X, X.T), + nu > (p - 1), + broadcast_conditions=False, ) def _repr_latex_(self, name=None, dist=None): @@ -729,12 +754,15 @@ def _repr_latex_(self, name=None, dist=None): dist = self nu = dist.nu V = dist.V - return r'${} \sim \text{{Wishart}}(\mathit{{nu}}={}, \mathit{{V}}={})$'.format(name, - get_variable_name(nu), - get_variable_name(V)) + return r"${} \sim \text{{Wishart}}(\mathit{{nu}}={}, \mathit{{V}}={})$".format( + name, get_variable_name(nu), get_variable_name(V) + ) -def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None): - R""" + +def WishartBartlett( + name, S, nu, is_cholesky=False, return_cholesky=False, testval=None +): + r""" Bartlett decomposition of the Wishart distribution. As the Wishart distribution requires the matrix to be symmetric positive semi-definite it is impossible for MCMC to ever propose acceptable matrices. @@ -791,17 +819,20 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv # Inverse transform testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T)) testval = linalg.cholesky(testval, lower=True) - diag_testval = testval[diag_idx]**2 + diag_testval = testval[diag_idx] ** 2 tril_testval = testval[tril_idx] else: diag_testval = None tril_testval = None - c = tt.sqrt(ChiSquared('c', nu - np.arange(2, 2 + n_diag), shape=n_diag, - testval=diag_testval)) - pm._log.info('Added new variable c to model diagonal of Wishart.') - z = Normal('z', 0., 1., shape=n_tril, testval=tril_testval) - pm._log.info('Added new variable z to model off-diagonals of Wishart.') + c = tt.sqrt( + ChiSquared( + "c", nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval + ) + ) + pm._log.info("Added new variable c to model diagonal of Wishart.") + z = Normal("z", 0.0, 1.0, shape=n_tril, testval=tril_testval) + pm._log.info("Added new variable z to model off-diagonals of Wishart.") # Construct A matrix A = tt.zeros(S.shape, dtype=np.float32) A = tt.set_subtensor(A[diag_idx], c) @@ -816,25 +847,29 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv def _lkj_normalizing_constant(eta, n): if eta == 1: - result = gammaln(2. * tt.arange(1, int((n - 1) / 2) + 1)).sum() + result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum() if n % 2 == 1: - result += (0.25 * (n ** 2 - 1) * tt.log(np.pi) - - 0.25 * (n - 1) ** 2 * tt.log(2.) - - (n - 1) * gammaln(int((n + 1) / 2))) + result += ( + 0.25 * (n ** 2 - 1) * tt.log(np.pi) + - 0.25 * (n - 1) ** 2 * tt.log(2.0) + - (n - 1) * gammaln(int((n + 1) / 2)) + ) else: - result += (0.25 * n * (n - 2) * tt.log(np.pi) - + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.) - + n * gammaln(n / 2) - (n - 1) * gammaln(n)) + result += ( + 0.25 * n * (n - 2) * tt.log(np.pi) + + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0) + + n * gammaln(n / 2) + - (n - 1) * gammaln(n) + ) else: result = -(n - 1) * gammaln(eta + 0.5 * (n - 1)) k = tt.arange(1, n) - result += (0.5 * k * tt.log(np.pi) - + gammaln(eta + 0.5 * (n - 1 - k))).sum() + result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum() return result class LKJCholeskyCov(Continuous): - R"""Covariance matrix with LKJ distributed correlations. + r"""Covariance matrix with LKJ distributed correlations. This defines a distribution over cholesky decomposed covariance matrices, such that the underlying correlation matrices follow an @@ -941,24 +976,25 @@ class LKJCholeskyCov(Continuous): determinant, URL (version: 2012-04-14): http://math.stackexchange.com/q/130026 """ + def __init__(self, eta, n, sd_dist, *args, **kwargs): self.n = n self.eta = eta - if 'transform' in kwargs: - raise ValueError('Invalid parameter: transform.') - if 'shape' in kwargs: - raise ValueError('Invalid parameter: shape.') + if "transform" in kwargs: + raise ValueError("Invalid parameter: transform.") + if "shape" in kwargs: + raise ValueError("Invalid parameter: shape.") shape = n * (n + 1) // 2 if sd_dist.shape.ndim not in [0, 1]: - raise ValueError('Invalid shape for sd_dist.') + raise ValueError("Invalid shape for sd_dist.") transform = transforms.CholeskyCovPacked(n) - kwargs['shape'] = shape - kwargs['transform'] = transform + kwargs["shape"] = shape + kwargs["transform"] = transform super(LKJCholeskyCov, self).__init__(*args, **kwargs) self.sd_dist = sd_dist @@ -976,8 +1012,8 @@ def logp(self, x): variance = tt.zeros(n) variance = tt.inc_subtensor(variance[0], x[0] ** 2) variance = tt.inc_subtensor( - variance[1:], - cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) + variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]] + ) sd_vals = tt.sqrt(variance) logp_sd = self.sd_dist.logp(sd_vals).sum() @@ -998,7 +1034,7 @@ def logp(self, x): class LKJCorr(Continuous): - R""" + r""" The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood. The LKJ distribution is a prior distribution for correlation matrices. @@ -1039,12 +1075,14 @@ class LKJCorr(Continuous): 100(9), pp.1989-2001. """ - def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs): + def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwargs): if (p is not None) and (n is not None) and (eta is None): - warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta ' - 'dimension parameter p -> n. Please update your code. ' - 'Automatically re-assigning parameters for backwards compatibility.', - DeprecationWarning) + warnings.warn( + "Parameters to LKJCorr have changed: shape parameter n -> eta " + "dimension parameter p -> n. Please update your code. " + "Automatically re-assigning parameters for backwards compatibility.", + DeprecationWarning, + ) self.n = p self.eta = n eta = self.eta @@ -1053,21 +1091,24 @@ def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwar self.n = n self.eta = eta else: - raise ValueError('Invalid parameter: please use eta as the shape parameter and ' - 'n as the dimension parameter.') + raise ValueError( + "Invalid parameter: please use eta as the shape parameter and " + "n as the dimension parameter." + ) shape = n * (n - 1) // 2 self.mean = floatX(np.zeros(shape)) - if transform == 'interval': + if transform == "interval": transform = transforms.interval(-1, 1) - super(LKJCorr, self).__init__(shape=shape, transform=transform, - *args, **kwargs) - warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta ' - 'dimension parameter p -> n. Please double check your initialization.', - DeprecationWarning) - self.tri_index = np.zeros([n, n], dtype='int32') + super(LKJCorr, self).__init__(shape=shape, transform=transform, *args, **kwargs) + warnings.warn( + "Parameters in LKJCorr have been rename: shape parameter n -> eta " + "dimension parameter p -> n. Please double check your initialization.", + DeprecationWarning, + ) + self.tri_index = np.zeros([n, n], dtype="int32") self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape) self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape) @@ -1075,28 +1116,27 @@ def _random(self, n, eta, size=None): size = size if isinstance(size, tuple) else (size,) # original implementation in R see: # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r - beta = eta - 1 + n/2 + beta = eta - 1 + n / 2 r12 = 2 * stats.beta.rvs(a=beta, b=beta, size=size) - 1 P = np.eye(n)[:, :, np.newaxis] * np.ones(size) P[0, 1] = r12 - P[1, 1] = np.sqrt(1 - r12**2) + P[1, 1] = np.sqrt(1 - r12 ** 2) if n > 2: - for m in range(1, n-1): + for m in range(1, n - 1): beta -= 0.5 - y = stats.beta.rvs(a=(m+1) / 2., b=beta, size=size) - z = stats.norm.rvs(loc=0, scale=1, size=(m+1, ) + size) - z = z / np.sqrt(np.einsum('ij,ij->j', z, z)) - P[0:m+1, m+1] = np.sqrt(y) * z - P[m+1, m+1] = np.sqrt(1 - y) - Pt = np.transpose(P, (2, 0 ,1)) - C = np.einsum('...ji,...jk->...ik', Pt, Pt) + y = stats.beta.rvs(a=(m + 1) / 2.0, b=beta, size=size) + z = stats.norm.rvs(loc=0, scale=1, size=(m + 1,) + size) + z = z / np.sqrt(np.einsum("ij,ij->j", z, z)) + P[0 : m + 1, m + 1] = np.sqrt(y) * z + P[m + 1, m + 1] = np.sqrt(1 - y) + Pt = np.transpose(P, (2, 0, 1)) + C = np.einsum("...ji,...jk->...ik", Pt, Pt) return C.transpose((1, 2, 0))[np.triu_indices(n, k=1)].T def random(self, point=None, size=None): n, eta = draw_values([self.n, self.eta], point=point, size=size) - size= 1 if size is None else size - samples = generate_samples(self._random, n, eta, - broadcast_shape=(size,)) + size = 1 if size is None else size + samples = generate_samples(self._random, n, eta, broadcast_shape=(size,)) return samples def logp(self, x): @@ -1107,17 +1147,19 @@ def logp(self, x): X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) - result += (eta - 1.) * tt.log(det(X)) - return bound(result, - tt.all(X <= 1), tt.all(X >= -1), - matrix_pos_def(X), - eta > 0, - broadcast_conditions=False + result += (eta - 1.0) * tt.log(det(X)) + return bound( + result, + tt.all(X <= 1), + tt.all(X >= -1), + matrix_pos_def(X), + eta > 0, + broadcast_conditions=False, ) class MatrixNormal(Continuous): - R""" + r""" Matrix-valued normal log-likelihood. .. math:: @@ -1205,12 +1247,22 @@ class MatrixNormal(Continuous): observed=data, shape=(m, n)) """ - def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None, - colcov=None, colchol=None, coltau=None, shape=None, *args, - **kwargs): + def __init__( + self, + mu=0, + rowcov=None, + rowchol=None, + rowtau=None, + colcov=None, + colchol=None, + coltau=None, + shape=None, + *args, + **kwargs + ): self._setup_matrices(colcov, colchol, coltau, rowcov, rowchol, rowtau) if shape is None: - raise TypeError('shape is a required argument') + raise TypeError("shape is a required argument") assert len(shape) == 2, "shape must have length 2: mxn" self.shape = shape super(MatrixNormal, self).__init__(shape=shape, *args, **kwargs) @@ -1220,93 +1272,106 @@ def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None, self.solve_upper = tt.slinalg.solve_upper_triangular def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): - cholesky = Cholesky(lower=True, on_error='raise') + cholesky = Cholesky(lower=True, on_error="raise") # Among-row matrices if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of rowtau, rowcov, ' - 'or rowchol.') + raise ValueError( + "Incompatible parameterization. " + "Specify exactly one of rowtau, rowcov, " + "or rowchol." + ) if rowcov is not None: self.m = rowcov.shape[0] - self._rowcov_type = 'cov' + self._rowcov_type = "cov" rowcov = tt.as_tensor_variable(rowcov) if rowcov.ndim != 2: - raise ValueError('rowcov must be two dimensional.') + raise ValueError("rowcov must be two dimensional.") self.rowchol_cov = cholesky(rowcov) self.rowcov = rowcov elif rowtau is not None: - raise ValueError('rowtau not supported at this time') + raise ValueError("rowtau not supported at this time") self.m = rowtau.shape[0] - self._rowcov_type = 'tau' + self._rowcov_type = "tau" rowtau = tt.as_tensor_variable(rowtau) if rowtau.ndim != 2: - raise ValueError('rowtau must be two dimensional.') + raise ValueError("rowtau must be two dimensional.") self.rowchol_tau = cholesky(rowtau) self.rowtau = rowtau else: self.m = rowchol.shape[0] - self._rowcov_type = 'chol' + self._rowcov_type = "chol" if rowchol.ndim != 2: - raise ValueError('rowchol must be two dimensional.') + raise ValueError("rowchol must be two dimensional.") self.rowchol_cov = tt.as_tensor_variable(rowchol) # Among-column matrices if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of coltau, colcov, ' - 'or colchol.') + raise ValueError( + "Incompatible parameterization. " + "Specify exactly one of coltau, colcov, " + "or colchol." + ) if colcov is not None: self.n = colcov.shape[0] - self._colcov_type = 'cov' + self._colcov_type = "cov" colcov = tt.as_tensor_variable(colcov) if colcov.ndim != 2: - raise ValueError('colcov must be two dimensional.') + raise ValueError("colcov must be two dimensional.") self.colchol_cov = cholesky(colcov) self.colcov = colcov elif coltau is not None: - raise ValueError('coltau not supported at this time') + raise ValueError("coltau not supported at this time") self.n = coltau.shape[0] - self._colcov_type = 'tau' + self._colcov_type = "tau" coltau = tt.as_tensor_variable(coltau) if coltau.ndim != 2: - raise ValueError('coltau must be two dimensional.') + raise ValueError("coltau must be two dimensional.") self.colchol_tau = cholesky(coltau) self.coltau = coltau else: self.n = colchol.shape[0] - self._colcov_type = 'chol' + self._colcov_type = "chol" if colchol.ndim != 2: - raise ValueError('colchol must be two dimensional.') + raise ValueError("colchol must be two dimensional.") self.colchol_cov = tt.as_tensor_variable(colchol) def random(self, point=None, size=None): mu, colchol, rowchol = draw_values( - [self.mu, self.colchol_cov, self.rowchol_cov], - point=point, - size=size) + [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size + ) if size is None: size = () if size in (None, ()): - standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1])) + standard_normal = np.random.standard_normal( + (self.shape[0], colchol.shape[-1]) + ) samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)) else: samples = [] size = tuple(np.atleast_1d(size)) if mu.shape == tuple(self.shape): for _ in range(np.prod(size)): - standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1])) - samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))) + standard_normal = np.random.standard_normal( + (self.shape[0], colchol.shape[-1]) + ) + samples.append( + mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)) + ) else: for j in range(np.prod(size)): - standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1])) - samples.append(mu[j] + - np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T))) + standard_normal = np.random.standard_normal( + (self.shape[0], colchol[j].shape[-1]) + ) + samples.append( + mu[j] + + np.matmul( + rowchol[j], np.matmul(standard_normal, colchol[j].T) + ) + ) samples = np.array(samples).reshape(size + tuple(self.shape)) return samples - - def _trquaddist(self, value): """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and the logdet of colcov and rowcov.""" @@ -1332,12 +1397,12 @@ def logp(self, value): trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value) m = self.m n = self.n - norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi)) - return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet + norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi)) + return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet class KroneckerNormal(Continuous): - R""" + r""" Multivariate normal log-likelihood with Kronecker-structured covariance. .. math:: @@ -1425,24 +1490,27 @@ class KroneckerNormal(Continuous): .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models" """ - def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, - *args, **kwargs): + def __init__( + self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs + ): self._setup(covs, chols, evds, sigma) super(KroneckerNormal, self).__init__(*args, **kwargs) self.mu = tt.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu def _setup(self, covs, chols, evds, sigma): - self.cholesky = Cholesky(lower=True, on_error='raise') + self.cholesky = Cholesky(lower=True, on_error="raise") if len([i for i in [covs, chols, evds] if i is not None]) != 1: - raise ValueError('Incompatible parameterization. ' - 'Specify exactly one of covs, chols, ' - 'or evds.') + raise ValueError( + "Incompatible parameterization. " + "Specify exactly one of covs, chols, " + "or evds." + ) self._isEVD = False self.sigma = sigma self.is_noisy = self.sigma is not None and self.sigma != 0 if covs is not None: - self._cov_type = 'cov' + self._cov_type = "cov" self.covs = covs if self.is_noisy: # Noise requires eigendecomposition @@ -1453,10 +1521,11 @@ def _setup(self, covs, chols, evds, sigma): self.chols = list(map(self.cholesky, self.covs)) self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) self.sizes = tt.as_tensor_variable( - [chol.shape[0] for chol in self.chols]) + [chol.shape[0] for chol in self.chols] + ) self.N = tt.prod(self.sizes) elif chols is not None: - self._cov_type = 'chol' + self._cov_type = "chol" if self.is_noisy: # A strange case... # Noise requires eigendecomposition covs = [tt.dot(chol, chol.T) for chol in chols] @@ -1466,10 +1535,11 @@ def _setup(self, covs, chols, evds, sigma): self.chols = chols self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) self.sizes = tt.as_tensor_variable( - [chol.shape[0] for chol in self.chols]) + [chol.shape[0] for chol in self.chols] + ) self.N = tt.prod(self.sizes) else: - self._cov_type = 'evd' + self._cov_type = "evd" self._setup_evd(evds) def _setup_evd(self, eigh_iterable): @@ -1481,18 +1551,18 @@ def _setup_evd(self, eigh_iterable): self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep)) self.eigs = kron_diag(*self.eigs_sep) # Combine separate eigs if self.is_noisy: - self.eigs += self.sigma**2 + self.eigs += self.sigma ** 2 self.N = self.eigs.shape[0] def _setup_random(self): - if not hasattr(self, 'mv_params'): - self.mv_params = {'mu': self.mu} - if self._cov_type == 'cov': + if not hasattr(self, "mv_params"): + self.mv_params = {"mu": self.mu} + if self._cov_type == "cov": cov = kronecker(*self.covs) if self.is_noisy: - cov = cov + self.sigma**2 * tt.identity_like(cov) - self.mv_params['cov'] = cov - elif self._cov_type == 'chol': + cov = cov + self.sigma ** 2 * tt.identity_like(cov) + self.mv_params["cov"] = cov + elif self._cov_type == "chol": if self.is_noisy: covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): @@ -1500,19 +1570,19 @@ def _setup_random(self): covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma**2 * tt.identity_like(cov) - self.mv_params['chol'] = self.cholesky(cov) + cov = cov + self.sigma ** 2 * tt.identity_like(cov) + self.mv_params["chol"] = self.cholesky(cov) else: - self.mv_params['chol'] = kronecker(*self.chols) - elif self._cov_type == 'evd': + self.mv_params["chol"] = kronecker(*self.chols) + elif self._cov_type == "evd": covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma**2 * tt.identity_like(cov) - self.mv_params['cov'] = cov + cov = cov + self.sigma ** 2 * tt.identity_like(cov) + self.mv_params["cov"] = cov def random(self, point=None, size=None): # Expand params into terms MvNormal can understand to force consistency @@ -1523,7 +1593,7 @@ def random(self, point=None, size=None): def _quaddist(self, value): """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))""" if value.ndim > 2 or value.ndim == 0: - raise ValueError('Invalid dimension for value: %s' % value.ndim) + raise ValueError("Invalid dimension for value: %s" % value.ndim) if value.ndim == 1: onedim = True value = value[None, :] @@ -1533,14 +1603,14 @@ def _quaddist(self, value): delta = value - self.mu if self._isEVD: sqrt_quad = kron_dot(self.QTs, delta.T) - sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None]) + sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None]) logdet = tt.sum(tt.log(self.eigs)) else: sqrt_quad = kron_solve_lower(self.chols, delta.T) logdet = 0 for chol_size, chol_diag in zip(self.sizes, self.chol_diags): - logchol = tt.log(chol_diag) * self.N/chol_size - logdet += tt.sum(2*logchol) + logchol = tt.log(chol_diag) * self.N / chol_size + logdet += tt.sum(2 * logchol) # Square each sample quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T) if onedim: @@ -1549,4 +1619,4 @@ def _quaddist(self, value): def logp(self, value): quad, logdet = self._quaddist(value) - return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0 + return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0 diff --git a/pymc3/distributions/special.py b/pymc3/distributions/special.py index 83d697ccf3..c53dbc5dbf 100644 --- a/pymc3/distributions/special.py +++ b/pymc3/distributions/special.py @@ -3,10 +3,10 @@ from theano.scalar.basic_scipy import GammaLn, Psi from theano import scalar -__all__ = ['gammaln', 'multigammaln', 'psi', 'log_i0'] +__all__ = ["gammaln", "multigammaln", "psi", "log_i0"] -scalar_gammaln = GammaLn(scalar.upgrade_to_float, name='scalar_gammaln') -gammaln = tt.Elemwise(scalar_gammaln, name='gammaln') +scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln") +gammaln = tt.Elemwise(scalar_gammaln, name="gammaln") def multigammaln(a, p): @@ -19,21 +19,35 @@ def multigammaln(a, p): degrees of freedom. p > 0 """ i = tt.arange(1, p + 1) - return (p * (p - 1) * tt.log(np.pi) / 4. - + tt.sum(gammaln(a + (1. - i) / 2.), axis=0)) + return p * (p - 1) * tt.log(np.pi) / 4.0 + tt.sum( + gammaln(a + (1.0 - i) / 2.0), axis=0 + ) def log_i0(x): """ Calculates the logarithm of the 0 order modified Bessel function of the first kind"" """ - return tt.switch(tt.lt(x, 5), tt.log1p(x**2. / 4. + x**4. / 64. + x**6. / 2304. - + x**8. / 147456. + x**10. / 14745600. - + x**12. / 2123366400.), - x - 0.5 * tt.log(2. * np.pi * x) + tt.log1p(1. / (8. * x) - + 9. / (128. * x**2.) + 225. / (3072. * x**3.) - + 11025. / (98304. * x**4.))) - - -scalar_psi = Psi(scalar.upgrade_to_float, name='scalar_psi') -psi = tt.Elemwise(scalar_psi, name='psi') + return tt.switch( + tt.lt(x, 5), + tt.log1p( + x ** 2.0 / 4.0 + + x ** 4.0 / 64.0 + + x ** 6.0 / 2304.0 + + x ** 8.0 / 147456.0 + + x ** 10.0 / 14745600.0 + + x ** 12.0 / 2123366400.0 + ), + x + - 0.5 * tt.log(2.0 * np.pi * x) + + tt.log1p( + 1.0 / (8.0 * x) + + 9.0 / (128.0 * x ** 2.0) + + 225.0 / (3072.0 * x ** 3.0) + + 11025.0 / (98304.0 * x ** 4.0) + ), + ) + + +scalar_psi = Psi(scalar.upgrade_to_float, name="scalar_psi") +psi = tt.Elemwise(scalar_psi, name="psi") diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 9c5847271a..afbb3c9b8c 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -8,13 +8,13 @@ __all__ = [ - 'AR1', - 'AR', - 'GaussianRandomWalk', - 'GARCH11', - 'EulerMaruyama', - 'MvGaussianRandomWalk', - 'MvStudentTRandomWalk' + "AR1", + "AR", + "GaussianRandomWalk", + "GARCH11", + "EulerMaruyama", + "MvGaussianRandomWalk", + "MvStudentTRandomWalk", ] @@ -35,7 +35,7 @@ def __init__(self, k, tau_e, *args, **kwargs): self.k = k = tt.as_tensor_variable(k) self.tau_e = tau_e = tt.as_tensor_variable(tau_e) self.tau = tau_e * (1 - k ** 2) - self.mode = tt.as_tensor_variable(0.) + self.mode = tt.as_tensor_variable(0.0) def logp(self, x): k = self.k @@ -43,7 +43,7 @@ def logp(self, x): x_im1 = x[:-1] x_i = x[1:] - boundary = Normal.dist(0., tau=tau_e).logp + boundary = Normal.dist(0.0, tau=tau_e).logp innov_like = Normal.dist(k * x_im1, tau=tau_e).logp(x_i) return boundary(x[0]) + tt.sum(innov_like) @@ -53,13 +53,14 @@ def _repr_latex_(self, name=None, dist=None): dist = self k = dist.k tau_e = dist.tau_e - name = r'\text{%s}' % name - return r'${} \sim \text{{AR1}}(\mathit{{k}}={},~\mathit{{tau_e}}={})$'.format(name, - get_variable_name(k), get_variable_name(tau_e)) + name = r"\text{%s}" % name + return r"${} \sim \text{{AR1}}(\mathit{{k}}={},~\mathit{{tau_e}}={})$".format( + name, get_variable_name(k), get_variable_name(tau_e) + ) class AR(distribution.Continuous): - R""" + r""" Autoregressive process with p lags. .. math:: @@ -89,16 +90,16 @@ class AR(distribution.Continuous): distribution for initial values (Defaults to Flat()) """ - def __init__(self, rho, sd=None, tau=None, - constant=False, init=Flat.dist(), - *args, **kwargs): + def __init__( + self, rho, sd=None, tau=None, constant=False, init=Flat.dist(), *args, **kwargs + ): super(AR, self).__init__(*args, **kwargs) tau, sd = get_tau_sd(tau=tau, sd=sd) self.sd = tt.as_tensor_variable(sd) self.tau = tt.as_tensor_variable(tau) - self.mean = tt.as_tensor_variable(0.) + self.mean = tt.as_tensor_variable(0.0) if isinstance(rho, list): p = len(rho) @@ -124,23 +125,33 @@ def __init__(self, rho, sd=None, tau=None, def logp(self, value): if self.constant: - x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)]) - eps = value[self.p:] - self.rho[0] - x + x = tt.add( + *[ + self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] + for i in range(self.p) + ] + ) + eps = value[self.p :] - self.rho[0] - x else: if self.p == 1: x = self.rho * value[:-1] else: - x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)]) - eps = value[self.p:] - x + x = tt.add( + *[ + self.rho[i] * value[self.p - (i + 1) : -(i + 1)] + for i in range(self.p) + ] + ) + eps = value[self.p :] - x innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps) - init_like = self.init.logp(value[:self.p]) + init_like = self.init.logp(value[: self.p]) return tt.sum(innov_like) + tt.sum(init_like) class GaussianRandomWalk(distribution.Continuous): - R""" + r""" Random Walk with Normal innovations Parameters @@ -155,15 +166,14 @@ class GaussianRandomWalk(distribution.Continuous): distribution for initial value (Defaults to Flat()) """ - def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0., - *args, **kwargs): + def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0.0, *args, **kwargs): super(GaussianRandomWalk, self).__init__(*args, **kwargs) tau, sd = get_tau_sd(tau=tau, sd=sd) self.tau = tau = tt.as_tensor_variable(tau) self.sd = sd = tt.as_tensor_variable(sd) self.mu = mu = tt.as_tensor_variable(mu) self.init = init - self.mean = tt.as_tensor_variable(0.) + self.mean = tt.as_tensor_variable(0.0) def logp(self, x): tau = self.tau @@ -182,14 +192,14 @@ def _repr_latex_(self, name=None, dist=None): dist = self mu = dist.mu sd = dist.sd - name = r'\text{%s}' % name - return r'${} \sim \text{{GaussianRandomWalk}}(\mathit{{mu}}={},~\mathit{{sd}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(sd)) + name = r"\text{%s}" % name + return r"${} \sim \text{{GaussianRandomWalk}}(\mathit{{mu}}={},~\mathit{{sd}}={})$".format( + name, get_variable_name(mu), get_variable_name(sd) + ) class GARCH11(distribution.Continuous): - R""" + r""" GARCH(1,1) with Normal innovations. The model is specified by .. math:: @@ -212,15 +222,14 @@ class GARCH11(distribution.Continuous): initial_vol >= 0, initial volatility, sigma_0 """ - def __init__(self, omega, alpha_1, beta_1, - initial_vol, *args, **kwargs): + def __init__(self, omega, alpha_1, beta_1, initial_vol, *args, **kwargs): super(GARCH11, self).__init__(*args, **kwargs) self.omega = omega = tt.as_tensor_variable(omega) self.alpha_1 = alpha_1 = tt.as_tensor_variable(alpha_1) self.beta_1 = beta_1 = tt.as_tensor_variable(beta_1) self.initial_vol = tt.as_tensor_variable(initial_vol) - self.mean = tt.as_tensor_variable(0.) + self.mean = tt.as_tensor_variable(0.0) def get_volatility(self, x): x = x[:-1] @@ -228,16 +237,17 @@ def get_volatility(self, x): def volatility_update(x, vol, w, a, b): return tt.sqrt(w + a * tt.square(x) + b * tt.square(vol)) - vol, _ = scan(fn=volatility_update, - sequences=[x], - outputs_info=[self.initial_vol], - non_sequences=[self.omega, self.alpha_1, - self.beta_1]) + vol, _ = scan( + fn=volatility_update, + sequences=[x], + outputs_info=[self.initial_vol], + non_sequences=[self.omega, self.alpha_1, self.beta_1], + ) return tt.concatenate([[self.initial_vol], vol]) def logp(self, x): vol = self.get_volatility(x) - return tt.sum(Normal.dist(0., sd=vol).logp(x)) + return tt.sum(Normal.dist(0.0, sd=vol).logp(x)) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -245,16 +255,17 @@ def _repr_latex_(self, name=None, dist=None): omega = dist.omega alpha_1 = dist.alpha_1 beta_1 = dist.beta_1 - name = r'\text{%s}' % name - return r'${} \sim \text{GARCH}(1,~1,~\mathit{{omega}}={},~\mathit{{alpha_1}}={},~\mathit{{beta_1}}={})$'.format( + name = r"\text{%s}" % name + return r"${} \sim \text{GARCH}(1,~1,~\mathit{{omega}}={},~\mathit{{alpha_1}}={},~\mathit{{beta_1}}={})$".format( name, get_variable_name(omega), get_variable_name(alpha_1), - get_variable_name(beta_1)) + get_variable_name(beta_1), + ) class EulerMaruyama(distribution.Continuous): - R""" + r""" Stochastic differential equation discretized with the Euler-Maruyama method. Parameters @@ -266,6 +277,7 @@ class EulerMaruyama(distribution.Continuous): sde_pars : tuple parameters of the SDE, passed as *args to sde_fn """ + def __init__(self, dt, sde_fn, sde_pars, *args, **kwds): super(EulerMaruyama, self).__init__(*args, **kwds) self.dt = dt = tt.as_tensor_variable(dt) @@ -283,14 +295,14 @@ def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self dt = dist.dt - name = r'\text{%s}' % name - return r'${} \sim \text{EulerMaruyama}(\mathit{{dt}}={})$'.format(name, - get_variable_name(dt)) - + name = r"\text{%s}" % name + return r"${} \sim \text{EulerMaruyama}(\mathit{{dt}}={})$".format( + name, get_variable_name(dt) + ) class MvGaussianRandomWalk(distribution.Continuous): - R""" + r""" Multivariate Random Walk with Normal innovations Parameters @@ -311,14 +323,24 @@ class MvGaussianRandomWalk(distribution.Continuous): Only one of cov, tau or chol is required. """ - def __init__(self, mu=0., cov=None, tau=None, chol=None, lower=True, init=Flat.dist(), - *args, **kwargs): + + def __init__( + self, + mu=0.0, + cov=None, + tau=None, + chol=None, + lower=True, + init=Flat.dist(), + *args, + **kwargs + ): super(MvGaussianRandomWalk, self).__init__(*args, **kwargs) self.init = init self.innovArgs = (mu, cov, tau, chol, lower) self.innov = multivariate.MvNormal.dist(*self.innovArgs) - self.mean = tt.as_tensor_variable(0.) + self.mean = tt.as_tensor_variable(0.0) def logp(self, x): x_im1 = x[:-1] @@ -331,14 +353,14 @@ def _repr_latex_(self, name=None, dist=None): dist = self mu = dist.innov.mu cov = dist.innov.cov - name = r'\text{%s}' % name - return r'${} \sim \text{MvGaussianRandomWalk}(\mathit{{mu}}={},~\mathit{{cov}}={})$'.format(name, - get_variable_name(mu), - get_variable_name(cov)) + name = r"\text{%s}" % name + return r"${} \sim \text{MvGaussianRandomWalk}(\mathit{{mu}}={},~\mathit{{cov}}={})$".format( + name, get_variable_name(mu), get_variable_name(cov) + ) class MvStudentTRandomWalk(MvGaussianRandomWalk): - R""" + r""" Multivariate Random Walk with StudentT innovations Parameters @@ -355,6 +377,7 @@ class MvStudentTRandomWalk(MvGaussianRandomWalk): init : distribution distribution for initial value (Defaults to Flat()) """ + def __init__(self, nu, *args, **kwargs): super(MvStudentTRandomWalk, self).__init__(*args, **kwargs) self.nu = tt.as_tensor_variable(nu) @@ -366,8 +389,7 @@ def _repr_latex_(self, name=None, dist=None): nu = dist.innov.nu mu = dist.innov.mu cov = dist.innov.cov - name = r'\text{%s}' % name - return r'${} \sim \text{MvStudentTRandomWalk}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{cov}}={})$'.format(name, - get_variable_name(nu), - get_variable_name(mu), - get_variable_name(cov)) + name = r"\text{%s}" % name + return r"${} \sim \text{MvStudentTRandomWalk}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{cov}}={})$".format( + name, get_variable_name(nu), get_variable_name(mu), get_variable_name(cov) + ) diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 682b3426d7..d89f4ea65a 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -9,9 +9,19 @@ import numpy as np from scipy.special import logit as nplogit -__all__ = ['transform', 'stick_breaking', 'logodds', 'interval', 'log_exp_m1', - 'lowerbound', 'upperbound', 'ordered', 'log', 'sum_to_1', - 't_stick_breaking'] +__all__ = [ + "transform", + "stick_breaking", + "logodds", + "interval", + "log_exp_m1", + "lowerbound", + "upperbound", + "ordered", + "log", + "sum_to_1", + "t_stick_breaking", +] class Transform(object): @@ -21,6 +31,7 @@ class Transform(object): ---------- name : str """ + name = "" def forward(self, x): @@ -99,7 +110,6 @@ def __str__(self): class ElemwiseTransform(Transform): - def jacobian_det(self, x): grad = tt.reshape(gradient(tt.sum(self.backward(x)), [x]), x.shape) return tt.log(tt.abs_(grad)) @@ -122,14 +132,14 @@ def __init__(self, dist, transform, *args, **kwargs): self.dist = dist self.transform_used = transform - v = forward(FreeRV(name='v', distribution=dist)) + v = forward(FreeRV(name="v", distribution=dist)) self.type = v.type super(TransformedDistribution, self).__init__( - v.shape.tag.test_value, v.dtype, - testval, dist.defaults, *args, **kwargs) + v.shape.tag.test_value, v.dtype, testval, dist.defaults, *args, **kwargs + ) - if transform.name == 'stickbreaking': + if transform.name == "stickbreaking": b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False)) # force the last dim not broadcastable self.type = tt.TensorType(v.dtype, b) @@ -144,6 +154,7 @@ def logp(self, x): def logp_nojac(self, x): return self.dist.logp(self.transform_used.backward(x)) + transform = Transform @@ -162,6 +173,7 @@ def forward_val(self, x, point=None): def jacobian_det(self, x): return x + log = Log() @@ -176,14 +188,15 @@ def forward(self, x): y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ - return tt.log(1.-tt.exp(-x)) + x + return tt.log(1.0 - tt.exp(-x)) + x def forward_val(self, x, point=None): - return np.log(1.-np.exp(-x)) + x + return np.log(1.0 - np.exp(-x)) + x def jacobian_det(self, x): return -tt.nnet.softplus(-x) + log_exp_m1 = LogExpM1() @@ -199,6 +212,7 @@ def forward(self, x): def forward_val(self, x, point=None): return nplogit(x) + logodds = LogOdds() @@ -224,14 +238,14 @@ def forward_val(self, x, point=None): # 2017-06-19 # the `self.a-0.` below is important for the testval to propagates # For an explanation see pull/2328#issuecomment-309303811 - a, b = draw_values([self.a-0., self.b-0.], - point=point) + a, b = draw_values([self.a - 0.0, self.b - 0.0], point=point) return floatX(np.log(x - a) - np.log(b - x)) def jacobian_det(self, x): s = tt.nnet.softplus(-x) return tt.log(self.b - self.a) - 2 * s - x + interval = Interval @@ -256,13 +270,13 @@ def forward_val(self, x, point=None): # 2017-06-19 # the `self.a-0.` below is important for the testval to propagates # For an explanation see pull/2328#issuecomment-309303811 - a = draw_values([self.a-0.], - point=point)[0] + a = draw_values([self.a - 0.0], point=point)[0] return floatX(np.log(x - a)) def jacobian_det(self, x): return x + lowerbound = LowerBound @@ -287,13 +301,13 @@ def forward_val(self, x, point=None): # 2017-06-19 # the `self.b-0.` below is important for the testval to propagates # For an explanation see pull/2328#issuecomment-309303811 - b = draw_values([self.b-0.], - point=point)[0] + b = draw_values([self.b - 0.0], point=point)[0] return floatX(np.log(b - x)) def jacobian_det(self, x): return x + upperbound = UpperBound @@ -321,6 +335,7 @@ def forward_val(self, x, point=None): def jacobian_det(self, y): return tt.sum(y[..., 1:], axis=-1) + ordered = Ordered() @@ -329,6 +344,7 @@ class SumTo1(Transform): Transforms K dimensional simplex space (values in [0,1] and sum to 1) to K - 1 vector of values in [0,1] This Transformation operates on the last dimension of the input tensor. """ + name = "sumto1" def backward(self, y): @@ -345,6 +361,7 @@ def jacobian_det(self, x): y = tt.zeros(x.shape) return tt.sum(y, axis=-1) + sum_to_1 = SumTo1() @@ -371,8 +388,8 @@ def forward(self, x_): s = tt.extra_ops.cumsum(x0[::-1], 0)[::-1] + x[-1] z = x0 / s Km1 = x.shape[0] - 1 - k = tt.arange(Km1)[(slice(None), ) + (None, ) * (x.ndim - 1)] - eq_share = logit(1. / (Km1 + 1 - k).astype(str(x_.dtype))) + k = tt.arange(Km1)[(slice(None),) + (None,) * (x.ndim - 1)] + eq_share = logit(1.0 / (Km1 + 1 - k).astype(str(x_.dtype))) y = logit(z) - eq_share return floatX(y.T) @@ -384,15 +401,15 @@ def forward_val(self, x_, point=None): z = x0 / s Km1 = x.shape[0] - 1 k = np.arange(Km1)[(slice(None),) + (None,) * (x.ndim - 1)] - eq_share = nplogit(1. / (Km1 + 1 - k).astype(str(x_.dtype))) + eq_share = nplogit(1.0 / (Km1 + 1 - k).astype(str(x_.dtype))) y = nplogit(z) - eq_share return floatX(y.T) def backward(self, y_): y = y_.T Km1 = y.shape[0] - k = tt.arange(Km1)[(slice(None), ) + (None, ) * (y.ndim - 1)] - eq_share = logit(1. / (Km1 + 1 - k).astype(str(y_.dtype))) + k = tt.arange(Km1)[(slice(None),) + (None,) * (y.ndim - 1)] + eq_share = logit(1.0 / (Km1 + 1 - k).astype(str(y_.dtype))) z = invlogit(y + eq_share, self.eps) yl = tt.concatenate([z, tt.ones(y[:1].shape)]) yu = tt.concatenate([tt.ones(y[:1].shape), 1 - z]) @@ -403,12 +420,15 @@ def backward(self, y_): def jacobian_det(self, y_): y = y_.T Km1 = y.shape[0] - k = tt.arange(Km1)[(slice(None), ) + (None, ) * (y.ndim - 1)] - eq_share = logit(1. / (Km1 + 1 - k).astype(str(y_.dtype))) + k = tt.arange(Km1)[(slice(None),) + (None,) * (y.ndim - 1)] + eq_share = logit(1.0 / (Km1 + 1 - k).astype(str(y_.dtype))) yl = y + eq_share yu = tt.concatenate([tt.ones(y[:1].shape), 1 - invlogit(yl, self.eps)]) S = tt.extra_ops.cumprod(yu, 0) - return tt.sum(tt.log(S[:-1]) - tt.log1p(tt.exp(yl)) - tt.log1p(tt.exp(-yl)), 0).T + return tt.sum( + tt.log(S[:-1]) - tt.log1p(tt.exp(yl)) - tt.log1p(tt.exp(-yl)), 0 + ).T + stick_breaking = StickBreaking() @@ -418,6 +438,7 @@ def jacobian_det(self, y_): class Circular(ElemwiseTransform): """Transforms a linear space into a circular one. """ + name = "circular" def backward(self, y): @@ -432,6 +453,7 @@ def forward_val(self, x, point=None): def jacobian_det(self, x): return tt.zeros(x.shape) + circular = Circular() @@ -458,7 +480,7 @@ def jacobian_det(self, y): class Chain(Transform): def __init__(self, transform_list): self.transform_list = transform_list - self.name = '+'.join([transf.name for transf in self.transform_list]) + self.name = "+".join([transf.name for transf in self.transform_list]) def forward(self, x): y = x @@ -488,7 +510,7 @@ def jacobian_det(self, y): y = transf.backward(y) ndim0 = min(ndim0, det_.ndim) # match the shape of the smallest jacobian_det - det = 0. + det = 0.0 for det_ in det_list: if det_.ndim > ndim0: det += det_.sum(axis=-1) diff --git a/pymc3/examples/GHME_2013.py b/pymc3/examples/GHME_2013.py index bb1e57c7e0..eb2f4439de 100644 --- a/pymc3/examples/GHME_2013.py +++ b/pymc3/examples/GHME_2013.py @@ -5,11 +5,11 @@ from pymc3 import HalfCauchy, Model, Normal, get_data, sample from pymc3.distributions.timeseries import GaussianRandomWalk -data = pd.read_csv(get_data('pancreatitis.csv')) -countries = ['CYP', 'DNK', 'ESP', 'FIN', 'GBR', 'ISL'] +data = pd.read_csv(get_data("pancreatitis.csv")) +countries = ["CYP", "DNK", "ESP", "FIN", "GBR", "ISL"] data = data[data.area.isin(countries)] -age = data['age'] = np.array(data.age_start + data.age_end) / 2 +age = data["age"] = np.array(data.age_start + data.age_end) / 2 rate = data.value = data.value * 1000 group, countries = pd.factorize(data.area, order=countries) @@ -20,7 +20,7 @@ plt.subplot(2, 3, i + 1) plt.title(country) d = data[data.area == country] - plt.plot(d.age, d.value, '.') + plt.plot(d.age, d.value, ".") plt.ylim(0, rate.max()) @@ -43,33 +43,33 @@ def interpolate(x0, y0, x, group): with Model() as model: - coeff_sd = HalfCauchy('coeff_sd', 5) + coeff_sd = HalfCauchy("coeff_sd", 5) - y = GaussianRandomWalk('y', sd=coeff_sd, shape=(nknots, ncountries)) + y = GaussianRandomWalk("y", sd=coeff_sd, shape=(nknots, ncountries)) p = interpolate(knots, y, age, group) - sd = HalfCauchy('sd', 5) + sd = HalfCauchy("sd", 5) - vals = Normal('vals', p, sd=sd, observed=rate) + vals = Normal("vals", p, sd=sd, observed=rate) def run(n=3000): if n == "short": n = 150 with model: - trace = sample(n, tune=int(n/2), init='advi+adapt_diag') + trace = sample(n, tune=int(n / 2), init="advi+adapt_diag") for i, country in enumerate(countries): plt.subplot(2, 3, i + 1) plt.title(country) d = data[data.area == country] - plt.plot(d.age, d.value, '.') - plt.plot(knots, trace[y][::5, :, i].T, color='r', alpha=.01) + plt.plot(d.age, d.value, ".") + plt.plot(knots, trace[y][::5, :, i].T, color="r", alpha=0.01) plt.ylim(0, rate.max()) -if __name__ == '__main__': +if __name__ == "__main__": run() diff --git a/pymc3/examples/LKJ_correlation.py b/pymc3/examples/LKJ_correlation.py index b2dc0bb1ff..9080a17e6e 100644 --- a/pymc3/examples/LKJ_correlation.py +++ b/pymc3/examples/LKJ_correlation.py @@ -14,32 +14,36 @@ stds = np.ones(4) / 2.0 # Correlation matrix of 4 variables: -corr_r = np.array([[1., 0.75, 0., 0.15], - [0.75, 1., -0.06, 0.19], - [0., -0.06, 1., -0.04], - [0.15, 0.19, -0.04, 1.]]) +corr_r = np.array( + [ + [1.0, 0.75, 0.0, 0.15], + [0.75, 1.0, -0.06, 0.19], + [0.0, -0.06, 1.0, -0.04], + [0.15, 0.19, -0.04, 1.0], + ] +) cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds))) dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs) with pm.Model() as model: - mu = pm.Normal('mu', mu=0, sd=1, shape=n_var) + mu = pm.Normal("mu", mu=0, sd=1, shape=n_var) # Note that we access the distribution for the standard # deviations, and do not create a new random variable. sd_dist = pm.HalfCauchy.dist(beta=2.5) - packed_chol = pm.LKJCholeskyCov('chol_cov', n=n_var, eta=1, sd_dist=sd_dist) + packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist) # compute the covariance matrix chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True) cov = tt.dot(chol, chol.T) # Extract the standard deviations etc - sd = pm.Deterministic('sd', tt.sqrt(tt.diag(cov))) - corr = tt.diag(sd**-1).dot(cov.dot(tt.diag(sd**-1))) - r = pm.Deterministic('r', corr[np.triu_indices(n_var, k=1)]) + sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov))) + corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1))) + r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)]) - like = pm.MvNormal('likelihood', mu=mu, chol=chol, observed=dataset) + like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset) def run(n=1000): @@ -47,8 +51,12 @@ def run(n=1000): n = 50 with model: trace = pm.sample(n) - pm.traceplot(trace, varnames=['mu', 'r'], - lines={'mu': mu_r, 'r': corr_r[np.triu_indices(n_var, k=1)]}) + pm.traceplot( + trace, + varnames=["mu", "r"], + lines={"mu": mu_r, "r": corr_r[np.triu_indices(n_var, k=1)]}, + ) -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pymc3/examples/arbitrary_stochastic.py b/pymc3/examples/arbitrary_stochastic.py index 2d6e4fe1f2..1b747e3b7d 100644 --- a/pymc3/examples/arbitrary_stochastic.py +++ b/pymc3/examples/arbitrary_stochastic.py @@ -10,15 +10,15 @@ def logp(failure, lam, value): def build_model(): # data - failure = np.array([0., 1.]) - value = np.array([1., 0.]) + failure = np.array([0.0, 1.0]) + value = np.array([1.0, 0.0]) # model with pm.Model() as model: - lam = pm.Exponential('lam', 1.) - pm.DensityDist('x', logp, observed={'failure': failure, - 'lam': lam, - 'value': value}) + lam = pm.Exponential("lam", 1.0) + pm.DensityDist( + "x", logp, observed={"failure": failure, "lam": lam, "value": value} + ) return model @@ -28,5 +28,6 @@ def run(n_samples=3000): trace = pm.sample(n_samples) return trace + if __name__ == "__main__": run() diff --git a/pymc3/examples/arma_example.py b/pymc3/examples/arma_example.py index 55889bbf97..2f8300c3b8 100644 --- a/pymc3/examples/arma_example.py +++ b/pymc3/examples/arma_example.py @@ -2,6 +2,7 @@ from theano import scan, shared import numpy as np + """ ARMA example It is interesting to note just how much more compact this is than the original STAN example @@ -53,10 +54,10 @@ def build_model(): y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32)) with pm.Model() as arma_model: - sigma = pm.HalfNormal('sigma', 5.) - theta = pm.Normal('theta', 0., sd=1.) - phi = pm.Normal('phi', 0., sd=2.) - mu = pm.Normal('mu', 0., sd=10.) + sigma = pm.HalfNormal("sigma", 5.0) + theta = pm.Normal("theta", 0.0, sd=1.0) + phi = pm.Normal("phi", 0.0, sd=2.0) + mu = pm.Normal("mu", 0.0, sd=10.0) err0 = y[0] - (mu + phi * mu) @@ -64,25 +65,27 @@ def calc_next(last_y, this_y, err, mu, phi, theta): nu_t = mu + phi * last_y + theta * err return this_y - nu_t - err, _ = scan(fn=calc_next, - sequences=dict(input=y, taps=[-1, 0]), - outputs_info=[err0], - non_sequences=[mu, phi, theta]) + err, _ = scan( + fn=calc_next, + sequences=dict(input=y, taps=[-1, 0]), + outputs_info=[err0], + non_sequences=[mu, phi, theta], + ) - pm.Potential('like', pm.Normal.dist(0, sd=sigma).logp(err)) + pm.Potential("like", pm.Normal.dist(0, sd=sigma).logp(err)) return arma_model def run(n_samples=1000): model = build_model() with model: - trace = pm.sample(draws=n_samples, - tune=1000, - nuts_kwargs=dict(target_accept=.99)) + trace = pm.sample( + draws=n_samples, tune=1000, nuts_kwargs=dict(target_accept=0.99) + ) pm.plots.traceplot(trace) pm.plots.forestplot(trace) -if __name__ == '__main__': +if __name__ == "__main__": run() diff --git a/pymc3/examples/baseball.py b/pymc3/examples/baseball.py index 89eefa9ce8..03c159851e 100644 --- a/pymc3/examples/baseball.py +++ b/pymc3/examples/baseball.py @@ -6,31 +6,38 @@ import pymc3 as pm import numpy as np + def build_model(): - data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", - skiprows=1, usecols=(2,3)) - - atbats = pm.floatX(data[:,0]) - hits = pm.floatX(data[:,1]) - + data = np.loadtxt( + pm.get_data("efron-morris-75-data.tsv"), + delimiter="\t", + skiprows=1, + usecols=(2, 3), + ) + + atbats = pm.floatX(data[:, 0]) + hits = pm.floatX(data[:, 1]) + N = len(hits) - + # we want to bound the kappa below BoundedKappa = pm.Bound(pm.Pareto, lower=1.0) - + with pm.Model() as model: - phi = pm.Uniform('phi', lower=0.0, upper=1.0) - kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5) - thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N) - ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits) + phi = pm.Uniform("phi", lower=0.0, upper=1.0) + kappa = BoundedKappa("kappa", alpha=1.0001, m=1.5) + thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N) + ys = pm.Binomial("ys", n=atbats, p=thetas, observed=hits) return model + def run(n=2000): model = build_model() with model: - trace = pm.sample(n, nuts_kwargs={'target_accept':.99}) + trace = pm.sample(n, nuts_kwargs={"target_accept": 0.99}) pm.traceplot(trace) -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pymc3/examples/custom_dists.py b/pymc3/examples/custom_dists.py index a535fd4fff..4aaa4080fb 100644 --- a/pymc3/examples/custom_dists.py +++ b/pymc3/examples/custom_dists.py @@ -22,23 +22,25 @@ # add scatter to points xdata = np.random.normal(xdata, 10) ydata = np.random.normal(ydata, 10) -data = {'x': xdata, 'y': ydata} +data = {"x": xdata, "y": ydata} # define loglikelihood outside of the model context, otherwise cores wont work: # Lambdas defined in local namespace are not picklable (see issue #1995) def loglike1(value): - return -1.5 * tt.log(1 + value**2) + return -1.5 * tt.log(1 + value ** 2) + + def loglike2(value): return -tt.log(tt.abs_(value)) + with pm.Model() as model: - alpha = pm.Normal('intercept', mu=0, sd=100) + alpha = pm.Normal("intercept", mu=0, sd=100) # Create custom densities - beta = pm.DensityDist('slope', loglike1, testval=0) - sigma = pm.DensityDist('sigma', loglike2, testval=1) + beta = pm.DensityDist("slope", loglike1, testval=0) + sigma = pm.DensityDist("sigma", loglike2, testval=1) # Create likelihood - like = pm.Normal('y_est', mu=alpha + beta * - xdata, sd=sigma, observed=ydata) + like = pm.Normal("y_est", mu=alpha + beta * xdata, sd=sigma, observed=ydata) trace = pm.sample(2000, cores=2) @@ -47,10 +49,11 @@ def loglike2(value): # Create some convenience routines for plotting # All functions below written by Jake Vanderplas + def compute_sigma_level(trace1, trace2, nbins=20): """From a set of traces, bin by number of standard deviations""" L, xbins, ybins = np.histogram2d(trace1, trace2, nbins) - L[L == 0] = 1E-16 + L[L == 0] = 1e-16 shape = L.shape L = L.ravel() @@ -73,14 +76,14 @@ def plot_MCMC_trace(ax, xdata, ydata, trace, scatter=False, **kwargs): xbins, ybins, sigma = compute_sigma_level(trace[0], trace[1]) ax.contour(xbins, ybins, sigma.T, levels=[0.683, 0.955], **kwargs) if scatter: - ax.plot(trace[0], trace[1], ',k', alpha=0.1) - ax.set_xlabel(r'$\alpha$') - ax.set_ylabel(r'$\beta$') + ax.plot(trace[0], trace[1], ",k", alpha=0.1) + ax.set_xlabel(r"$\alpha$") + ax.set_ylabel(r"$\beta$") def plot_MCMC_model(ax, xdata, ydata, trace): """Plot the linear model and 2sigma contours""" - ax.plot(xdata, ydata, 'ok') + ax.plot(xdata, ydata, "ok") alpha, beta = trace[:2] xfit = np.linspace(-20, 120, 10) @@ -88,22 +91,21 @@ def plot_MCMC_model(ax, xdata, ydata, trace): mu = yfit.mean(0) sig = 2 * yfit.std(0) - ax.plot(xfit, mu, '-k') - ax.fill_between(xfit, mu - sig, mu + sig, color='lightgray') + ax.plot(xfit, mu, "-k") + ax.fill_between(xfit, mu - sig, mu + sig, color="lightgray") - ax.set_xlabel('x') - ax.set_ylabel('y') + ax.set_xlabel("x") + ax.set_ylabel("y") -def plot_MCMC_results(xdata, ydata, trace, colors='k'): +def plot_MCMC_results(xdata, ydata, trace, colors="k"): """Plot both the trace and the model together""" _, ax = plt.subplots(1, 2, figsize=(10, 4)) plot_MCMC_trace(ax[0], xdata, ydata, trace, True, colors=colors) plot_MCMC_model(ax[1], xdata, ydata, trace) -pymc3_trace = [trace['intercept'], - trace['slope'], - trace['sigma']] + +pymc3_trace = [trace["intercept"], trace["slope"], trace["sigma"]] plot_MCMC_results(xdata, ydata, pymc3_trace) plt.show() diff --git a/pymc3/examples/disaster_model.py b/pymc3/examples/disaster_model.py index 45c9ec0120..81347dd039 100644 --- a/pymc3/examples/disaster_model.py +++ b/pymc3/examples/disaster_model.py @@ -14,34 +14,148 @@ from numpy import arange, array -__all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate', - 'disasters'] +__all__ = [ + "disasters_data", + "switchpoint", + "early_mean", + "late_mean", + "rate", + "disasters", +] # Time series of recorded coal mining disasters in the UK from 1851 to 1962 -disasters_data = array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, - 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, - 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, - 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, - 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, - 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) +disasters_data = array( + [ + 4, + 5, + 4, + 0, + 1, + 4, + 3, + 4, + 0, + 6, + 3, + 3, + 4, + 0, + 2, + 6, + 3, + 3, + 5, + 4, + 5, + 3, + 1, + 4, + 4, + 1, + 5, + 5, + 3, + 4, + 2, + 5, + 2, + 2, + 3, + 4, + 2, + 1, + 3, + 2, + 2, + 1, + 1, + 1, + 1, + 3, + 0, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 3, + 1, + 0, + 3, + 2, + 2, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 2, + 3, + 3, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 2, + 4, + 2, + 0, + 0, + 1, + 4, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + ] +) year = arange(1851, 1962) with pm.Model() as model: - switchpoint = pm.DiscreteUniform('switchpoint', lower=year.min(), upper=year.max()) - early_mean = pm.Exponential('early_mean', lam=1.) - late_mean = pm.Exponential('late_mean', lam=1.) + switchpoint = pm.DiscreteUniform("switchpoint", lower=year.min(), upper=year.max()) + early_mean = pm.Exponential("early_mean", lam=1.0) + late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location rate = tt.switch(switchpoint >= year, early_mean, late_mean) - - disasters = pm.Poisson('disasters', rate, observed=disasters_data) + + disasters = pm.Poisson("disasters", rate, observed=disasters_data) # Initial values for stochastic nodes - start = {'early_mean': 2., 'late_mean': 3.} - + start = {"early_mean": 2.0, "late_mean": 3.0} + tr = pm.sample(1000, tune=500, start=start) pm.traceplot(tr) diff --git a/pymc3/examples/disaster_model_theano_op.py b/pymc3/examples/disaster_model_theano_op.py index de11b57079..ae3115a678 100644 --- a/pymc3/examples/disaster_model_theano_op.py +++ b/pymc3/examples/disaster_model_theano_op.py @@ -10,17 +10,131 @@ import theano.tensor as tt from numpy import arange, array, empty -__all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate', - 'disasters'] +__all__ = [ + "disasters_data", + "switchpoint", + "early_mean", + "late_mean", + "rate", + "disasters", +] # Time series of recorded coal mining disasters in the UK from 1851 to 1962 -disasters_data = array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, - 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, - 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, - 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, - 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, - 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) +disasters_data = array( + [ + 4, + 5, + 4, + 0, + 1, + 4, + 3, + 4, + 0, + 6, + 3, + 3, + 4, + 0, + 2, + 6, + 3, + 3, + 5, + 4, + 5, + 3, + 1, + 4, + 4, + 1, + 5, + 5, + 3, + 4, + 2, + 5, + 2, + 2, + 3, + 4, + 2, + 1, + 3, + 2, + 2, + 1, + 1, + 1, + 1, + 3, + 0, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 3, + 1, + 0, + 3, + 2, + 2, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 2, + 3, + 3, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 2, + 4, + 2, + 0, + 0, + 1, + 4, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + ] +) years = len(disasters_data) @@ -35,10 +149,10 @@ def rate_(switchpoint, early_mean, late_mean): with pm.Model() as model: # Prior for distribution of switchpoint location - switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years) + switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters - early_mean = pm.Exponential('early_mean', lam=1.) - late_mean = pm.Exponential('late_mean', lam=1.) + early_mean = pm.Exponential("early_mean", lam=1.0) + late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location @@ -46,7 +160,7 @@ def rate_(switchpoint, early_mean, late_mean): rate = rate_(switchpoint, early_mean, late_mean) # Data likelihood - disasters = pm.Poisson('disasters', rate, observed=disasters_data) + disasters = pm.Poisson("disasters", rate, observed=disasters_data) # Use slice sampler for means step1 = pm.Slice([early_mean, late_mean]) @@ -54,7 +168,7 @@ def rate_(switchpoint, early_mean, late_mean): step2 = pm.Metropolis([switchpoint]) # Initial values for stochastic nodes - start = {'early_mean': 2., 'late_mean': 3.} + start = {"early_mean": 2.0, "late_mean": 3.0} tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], cores=2) pm.traceplot(tr) diff --git a/pymc3/examples/factor_potential.py b/pymc3/examples/factor_potential.py index ccb1c9251c..078c0eb27f 100644 --- a/pymc3/examples/factor_potential.py +++ b/pymc3/examples/factor_potential.py @@ -7,12 +7,14 @@ STAN. """ + def build_model(): with pm.Model() as model: - x = pm.Normal('x', 1, 1) - x2 = pm.Potential('x2', -x ** 2) + x = pm.Normal("x", 1, 1) + x2 = pm.Potential("x2", -x ** 2) return model + def run(n=1000): model = build_model() if n == "short": @@ -20,5 +22,6 @@ def run(n=1000): with model: pm.sample(n) -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pymc3/examples/garch_example.py b/pymc3/examples/garch_example.py index d530e76011..cd8516f5d1 100644 --- a/pymc3/examples/garch_example.py +++ b/pymc3/examples/garch_example.py @@ -38,12 +38,11 @@ def get_garch_model(): shape = r.shape with Model() as garch: - alpha1 = Uniform('alpha1', 0., 1., shape=shape) - beta1 = Uniform('beta1', 0., 1 - alpha1, shape=shape) - mu = Normal('mu', mu=0., sd=100., shape=shape) - theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + - beta1 * tt.pow(sigma1, 2)) - Normal('obs', mu, sd=theta, observed=r) + alpha1 = Uniform("alpha1", 0.0, 1.0, shape=shape) + beta1 = Uniform("beta1", 0.0, 1 - alpha1, shape=shape) + mu = Normal("mu", mu=0.0, sd=100.0, shape=shape) + theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2)) + Normal("obs", mu, sd=theta, observed=r) return garch @@ -55,5 +54,5 @@ def run(n=1000): return tr -if __name__ == '__main__': +if __name__ == "__main__": summary(run()) diff --git a/pymc3/examples/gelman_bioassay.py b/pymc3/examples/gelman_bioassay.py index 69d7ca1118..2de90d146c 100644 --- a/pymc3/examples/gelman_bioassay.py +++ b/pymc3/examples/gelman_bioassay.py @@ -4,19 +4,19 @@ # Samples for each dose level n = 5 * ones(4, dtype=int) # Log-dose -dose = array([-.86, -.3, -.05, .73]) +dose = array([-0.86, -0.3, -0.05, 0.73]) with pm.Model() as model: # Logit-linear model parameters - alpha = pm.Normal('alpha', 0, sd=100.) - beta = pm.Normal('beta', 0, sd=1.) + alpha = pm.Normal("alpha", 0, sd=100.0) + beta = pm.Normal("beta", 0, sd=1.0) # Calculate probabilities of death - theta = pm.Deterministic('theta', pm.math.invlogit(alpha + beta * dose)) + theta = pm.Deterministic("theta", pm.math.invlogit(alpha + beta * dose)) # Data likelihood - deaths = pm.Binomial('deaths', n=n, p=theta, observed=[0, 1, 3, 5]) + deaths = pm.Binomial("deaths", n=n, p=theta, observed=[0, 1, 3, 5]) def run(n=1000): @@ -25,5 +25,6 @@ def run(n=1000): with model: pm.sample(n, tune=1000) -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pymc3/examples/gelman_schools.py b/pymc3/examples/gelman_schools.py index 0331f81682..4ba7c10136 100644 --- a/pymc3/examples/gelman_schools.py +++ b/pymc3/examples/gelman_schools.py @@ -1,7 +1,7 @@ from pymc3 import HalfCauchy, Normal, sample, Model, loo import numpy as np -'''Original Stan model +"""Original Stan model data { int J; // number of schools @@ -22,21 +22,21 @@ eta ~ normal(0, 1); y ~ normal(theta, sigma); } -''' +""" J = 8 -y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) -sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) +y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) +sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) with Model() as schools: - eta = Normal('eta', 0, 1, shape=J) - mu = Normal('mu', 0, sd=1e6) - tau = HalfCauchy('tau', 25) + eta = Normal("eta", 0, 1, shape=J) + mu = Normal("mu", 0, sd=1e6) + tau = HalfCauchy("tau", 25) theta = mu + tau * eta - obs = Normal('obs', theta, sd=sigma, observed=y) + obs = Normal("obs", theta, sd=sigma, observed=y) def run(n=1000): @@ -46,5 +46,6 @@ def run(n=1000): tr = sample(n) loo(tr) -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pymc3/examples/lasso_missing.py b/pymc3/examples/lasso_missing.py index 3bf42ede9f..3929a34930 100644 --- a/pymc3/examples/lasso_missing.py +++ b/pymc3/examples/lasso_missing.py @@ -3,53 +3,72 @@ from numpy.ma import masked_values # Import data, filling missing values with sentinels (-999) -test_scores = pd.read_csv(pm.get_data('test_scores.csv')).fillna(-999) +test_scores = pd.read_csv(pm.get_data("test_scores.csv")).fillna(-999) # Extract variables: test score, gender, number of siblings, previous disability, age, # mother with HS education or better, hearing loss identified by 3 months # of age -(score, male, siblings, disability, - age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings', - 'prev_disab', 'age_test', - 'mother_hs', 'early_ident']].astype(float).values.T +(score, male, siblings, disability, age, mother_hs, early_ident) = ( + test_scores[ + [ + "score", + "male", + "siblings", + "prev_disab", + "age_test", + "mother_hs", + "early_ident", + ] + ] + .astype(float) + .values.T +) with pm.Model() as model: # Impute missing values - sib_mean = pm.Exponential('sib_mean', 1.) - siblings_imp = pm.Poisson('siblings_imp', sib_mean, - observed=siblings) + sib_mean = pm.Exponential("sib_mean", 1.0) + siblings_imp = pm.Poisson("siblings_imp", sib_mean, observed=siblings) - p_disab = pm.Beta('p_disab', 1., 1.) + p_disab = pm.Beta("p_disab", 1.0, 1.0) disability_imp = pm.Bernoulli( - 'disability_imp', p_disab, observed=masked_values(disability, value=-999)) + "disability_imp", p_disab, observed=masked_values(disability, value=-999) + ) - p_mother = pm.Beta('p_mother', 1., 1.) - mother_imp = pm.Bernoulli('mother_imp', p_mother, - observed=masked_values(mother_hs, value=-999)) + p_mother = pm.Beta("p_mother", 1.0, 1.0) + mother_imp = pm.Bernoulli( + "mother_imp", p_mother, observed=masked_values(mother_hs, value=-999) + ) - s = pm.HalfCauchy('s', 5., testval=5) - beta = pm.Laplace('beta', 0., 100., shape=7, testval=.1) + s = pm.HalfCauchy("s", 5.0, testval=5) + beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1) - expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp + beta[3] * disability_imp + - beta[4] * age + beta[5] * mother_imp + beta[6] * early_ident) + expected_score = ( + beta[0] + + beta[1] * male + + beta[2] * siblings_imp + + beta[3] * disability_imp + + beta[4] * age + + beta[5] * mother_imp + + beta[6] * early_ident + ) - observed_score = pm.Normal( - 'observed_score', expected_score, s, observed=score) + observed_score = pm.Normal("observed_score", expected_score, s, observed=score) with model: start = pm.find_MAP() step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start) - step2 = pm.BinaryGibbsMetropolis([mother_imp.missing_values, - disability_imp.missing_values]) + step2 = pm.BinaryGibbsMetropolis( + [mother_imp.missing_values, disability_imp.missing_values] + ) def run(n=5000): - if n == 'short': + if n == "short": n = 100 with model: pm.sample(n, step=[step1, step2], start=start) -if __name__ == '__main__': +if __name__ == "__main__": run() diff --git a/pymc3/examples/lightspeed_example.py b/pymc3/examples/lightspeed_example.py index 23c09f728b..d17a2acc99 100644 --- a/pymc3/examples/lightspeed_example.py +++ b/pymc3/examples/lightspeed_example.py @@ -1,11 +1,76 @@ import numpy as np import pymc3 as pm -light_speed = np.array([28, 26, 33, 24, 34, -44, 27, 16, 40, -2, 29, 22, 24, 21, 25, - 30, 23, 29, 31, 19, 24, 20, 36, 32, 36, 28, 25, 21, 28, 29, - 37, 25, 28, 26, 30, 32, 36, 26, 30, 22, 36, 23, 27, 27, 28, - 27, 31, 27, 26, 33, 26, 32, 32, 24, 39, 28, 24, 25, 32, 25, - 29, 27, 28, 29, 16, 23]) +light_speed = np.array( + [ + 28, + 26, + 33, + 24, + 34, + -44, + 27, + 16, + 40, + -2, + 29, + 22, + 24, + 21, + 25, + 30, + 23, + 29, + 31, + 19, + 24, + 20, + 36, + 32, + 36, + 28, + 25, + 21, + 28, + 29, + 37, + 25, + 28, + 26, + 30, + 32, + 36, + 26, + 30, + 22, + 36, + 23, + 27, + 27, + 28, + 27, + 31, + 27, + 26, + 33, + 26, + 32, + 32, + 24, + 39, + 28, + 24, + 25, + 32, + 25, + 29, + 27, + 28, + 29, + 16, + 23, + ] +) model_1 = pm.Model() @@ -15,13 +80,15 @@ # sigma = pm.Uniform('sigma', lower = 0, upper= np.inf) # using vague priors works - mu = pm.Uniform('mu', lower=light_speed.std() / 1000.0, - upper=light_speed.std() * 1000.0) - sigma = pm.Uniform('sigma', lower=light_speed.std() / - 1000.0, upper=light_speed.std() * 1000.0) + mu = pm.Uniform( + "mu", lower=light_speed.std() / 1000.0, upper=light_speed.std() * 1000.0 + ) + sigma = pm.Uniform( + "sigma", lower=light_speed.std() / 1000.0, upper=light_speed.std() * 1000.0 + ) # define likelihood - y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=light_speed) + y_obs = pm.Normal("Y_obs", mu=mu, sd=sigma, observed=light_speed) def run(n=5000): @@ -31,5 +98,5 @@ def run(n=5000): pm.summary(trace) -if __name__ == '__main__': +if __name__ == "__main__": run() diff --git a/pymc3/examples/rankdata_ordered.py b/pymc3/examples/rankdata_ordered.py index b7fd0000b4..8edea3eac6 100644 --- a/pymc3/examples/rankdata_ordered.py +++ b/pymc3/examples/rankdata_ordered.py @@ -22,39 +22,41 @@ with pm.Model() as m: - mu_hat = pm.Normal('mu_hat', 0, 1, shape=K-1) + mu_hat = pm.Normal("mu_hat", 0, 1, shape=K - 1) # set first value to 0 to avoid unidentified model - mu = tt.concatenate([[0.], mu_hat]) + mu = tt.concatenate([[0.0], mu_hat]) # sd = pm.HalfCauchy('sigma', 1.) - latent = pm.Normal('latent', - mu=mu[y_argsort], - sd=1., # using sd does not work yet - transform=pm.distributions.transforms.ordered, - shape=y_argsort.shape, - testval=np.repeat(np.arange(K)[:,None], J, axis=1).T) - # There are some problems using Ordered - # right now, you need to specify testval + latent = pm.Normal( + "latent", + mu=mu[y_argsort], + sd=1.0, # using sd does not work yet + transform=pm.distributions.transforms.ordered, + shape=y_argsort.shape, + testval=np.repeat(np.arange(K)[:, None], J, axis=1).T, + ) + # There are some problems using Ordered + # right now, you need to specify testval def run(n=1500): - if n == 'short': + if n == "short": n = 50 with m: trace = pm.sample(n) - pm.traceplot(trace, varnames=['mu_hat']) + pm.traceplot(trace, varnames=["mu_hat"]) - print('Example observed data: ') + print("Example observed data: ") print(y[:30, :].T) - print('The true ranking is: ') + print("The true ranking is: ") print(yreal.flatten()) - print('The Latent mean is: ') - latentmu = np.hstack(([0], pm.summary(trace, varnames=['mu_hat'])['mean'].values)) + print("The Latent mean is: ") + latentmu = np.hstack(([0], pm.summary(trace, varnames=["mu_hat"])["mean"].values)) print(np.round(latentmu, 2)) - print('The estimated ranking is: ') + print("The estimated ranking is: ") print(np.argsort(latentmu)) -if __name__ == '__main__': +if __name__ == "__main__": run() diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py index 14b87d7958..89f87200c8 100644 --- a/pymc3/examples/samplers_mvnormal.py +++ b/pymc3/examples/samplers_mvnormal.py @@ -19,6 +19,7 @@ # different behaviour with respect to blocking. USE_XY = True + def run(steppers, p): steppers = set(steppers) traces = {} @@ -27,52 +28,46 @@ def run(steppers, p): with pm.Model() as model: if USE_XY: - x = pm.Flat('x') - y = pm.Flat('y') - mu = np.array([0.,0.]) - cov = np.array([[1.,p],[p,1.]]) - z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y])) - pot = pm.Potential('logp_xy', z) - start = {'x': 0, 'y': 0} + x = pm.Flat("x") + y = pm.Flat("y") + mu = np.array([0.0, 0.0]) + cov = np.array([[1.0, p], [p, 1.0]]) + z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x, y])) + pot = pm.Potential("logp_xy", z) + start = {"x": 0, "y": 0} else: - mu = np.array([0.,0.]) - cov = np.array([[1.,p],[p,1.]]) - z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,)) - start={'z': [0, 0]} + mu = np.array([0.0, 0.0]) + cov = np.array([[1.0, p], [p, 1.0]]) + z = pm.MvNormal("z", mu=mu, cov=cov, shape=(2,)) + start = {"z": [0, 0]} for step_cls in steppers: name = step_cls.__name__ t_start = time.time() mt = pm.sample( - draws=10000, - chains=16, parallelize=False, - step=step_cls(), - start=start + draws=10000, chains=16, parallelize=False, step=step_cls(), start=start ) runtimes[name] = time.time() - t_start - print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains)) + print( + "{} samples across {} chains".format(len(mt) * mt.nchains, mt.nchains) + ) traces[name] = mt en = pm.diagnostics.effective_n(mt) - print('effective: {}\r\n'.format(en)) + print("effective: {}\r\n".format(en)) if USE_XY: - effn[name] = np.mean(en['x']) / len(mt) / mt.nchains + effn[name] = np.mean(en["x"]) / len(mt) / mt.nchains else: - effn[name] = np.mean(en['z']) / len(mt) / mt.nchains + effn[name] = np.mean(en["z"]) / len(mt) / mt.nchains return traces, effn, runtimes -if __name__ == '__main__': - methods = [ - pm.Metropolis, - pm.Slice, - pm.NUTS, - pm.DEMetropolis - ] +if __name__ == "__main__": + methods = [pm.Metropolis, pm.Slice, pm.NUTS, pm.DEMetropolis] names = [c.__name__ for c in methods] - df_base = pd.DataFrame(columns=['p'] + names) - df_base['p'] = [.0,.9] - df_base = df_base.set_index('p') + df_base = pd.DataFrame(columns=["p"] + names) + df_base["p"] = [0.0, 0.9] + df_base = df_base.set_index("p") df_effectiven = df_base.copy() df_runtime = df_base.copy() @@ -85,16 +80,16 @@ def run(steppers, p): df_runtime.set_value(p, name, runtime[name]) df_performance.set_value(p, name, rate[name] / runtime[name]) - print('\r\nEffective sample size [0...1]') - print(df_effectiven.T.to_string(float_format='{:.3f}'.format)) + print("\r\nEffective sample size [0...1]") + print(df_effectiven.T.to_string(float_format="{:.3f}".format)) - print('\r\nRuntime [s]') - print(df_runtime.T.to_string(float_format='{:.1f}'.format)) + print("\r\nRuntime [s]") + print(df_runtime.T.to_string(float_format="{:.1f}".format)) - if 'NUTS' in names: - print('\r\nNormalized effective sampling rate [0...1]') - df_performance = df_performance.T / df_performance.loc[0]['NUTS'] + if "NUTS" in names: + print("\r\nNormalized effective sampling rate [0...1]") + df_performance = df_performance.T / df_performance.loc[0]["NUTS"] else: - print('\r\nNormalized effective sampling rate [1/s]') + print("\r\nNormalized effective sampling rate [1/s]") df_performance = df_performance.T - print(df_performance.to_string(float_format='{:.3f}'.format)) + print(df_performance.to_string(float_format="{:.3f}".format)) diff --git a/pymc3/examples/simpletest.py b/pymc3/examples/simpletest.py index d67f63176b..2f83391bb1 100644 --- a/pymc3/examples/simpletest.py +++ b/pymc3/examples/simpletest.py @@ -3,15 +3,15 @@ # import pydevd # pydevd.set_pm_excepthook() -np.seterr(invalid='raise') +np.seterr(invalid="raise") data = np.random.normal(size=(2, 20)) with pm.Model() as model: - x = pm.Normal('x', mu=.5, sd=2., shape=(2, 1)) - z = pm.Beta('z', alpha=10, beta=5.5) - d = pm.Normal('data', mu=x, sd=.75, observed=data) + x = pm.Normal("x", mu=0.5, sd=2.0, shape=(2, 1)) + z = pm.Beta("z", alpha=10, beta=5.5) + d = pm.Normal("data", mu=x, sd=0.75, observed=data) def run(n=1000): @@ -19,7 +19,8 @@ def run(n=1000): n = 50 with model: trace = pm.sample(n) - pm.traceplot(trace, varnames=['x']) + pm.traceplot(trace, varnames=["x"]) -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pymc3/exceptions.py b/pymc3/exceptions.py index fd9afd76ea..7e58f34c06 100644 --- a/pymc3/exceptions.py +++ b/pymc3/exceptions.py @@ -1,4 +1,4 @@ -__all__ = ['SamplingError'] +__all__ = ["SamplingError"] class SamplingError(RuntimeError): diff --git a/pymc3/glm/families.py b/pymc3/glm/families.py index 4a15339993..2dfc0a0902 100644 --- a/pymc3/glm/families.py +++ b/pymc3/glm/families.py @@ -6,7 +6,7 @@ from ..model import modelcontext from .. import distributions as pm_dists -__all__ = ['Normal', 'StudentT', 'Binomial', 'Poisson', 'NegativeBinomial'] +__all__ = ["Normal", "StudentT", "Binomial", "Poisson", "NegativeBinomial"] # Define link functions @@ -14,11 +14,11 @@ # it as a method. -class Identity(): - +class Identity: def __call__(self, x): return x + identity = Identity() logit = tt.nnet.sigmoid inverse = tt.inv @@ -28,19 +28,20 @@ def __call__(self, x): class Family(object): """Base class for Family of likelihood distribution and link functions. """ + priors = {} link = None def __init__(self, **kwargs): # Overwrite defaults for key, val in kwargs.items(): - if key == 'priors': + if key == "priors": self.priors = copy(self.priors) self.priors.update(val) else: setattr(self, key, val) - def _get_priors(self, model=None, name=''): + def _get_priors(self, model=None, name=""): """Return prior distributions of the likelihood. Returns @@ -48,14 +49,14 @@ def _get_priors(self, model=None, name=''): dict : mapping name -> pymc3 distribution """ if name: - name = '{}_'.format(name) + name = "{}_".format(name) model = modelcontext(model) priors = {} for key, val in self.priors.items(): if isinstance(val, (numbers.Number, np.ndarray, np.generic)): priors[key] = val else: - priors[key] = model.Var('{}{}'.format(name, key), val) + priors[key] = model.Var("{}{}".format(name, key), val) return priors @@ -73,48 +74,55 @@ def create_likelihood(self, name, y_est, y_data, model=None): # Wrap y_est in link function priors[self.parent] = self.link(y_est) if name: - name = '{}_'.format(name) - return self.likelihood('{}y'.format(name), observed=y_data, **priors) + name = "{}_".format(name) + return self.likelihood("{}y".format(name), observed=y_data, **priors) def __repr__(self): return """Family {klass}: Likelihood : {likelihood}({parent}) Priors : {priors} - Link function: {link}.""".format(klass=self.__class__, likelihood=self.likelihood.__name__, parent=self.parent, priors=self.priors, link=self.link) + Link function: {link}.""".format( + klass=self.__class__, + likelihood=self.likelihood.__name__, + parent=self.parent, + priors=self.priors, + link=self.link, + ) class StudentT(Family): link = identity likelihood = pm_dists.StudentT - parent = 'mu' - priors = {'lam': pm_dists.HalfCauchy.dist(beta=10, testval=1.), - 'nu': 1} + parent = "mu" + priors = {"lam": pm_dists.HalfCauchy.dist(beta=10, testval=1.0), "nu": 1} class Normal(Family): link = identity likelihood = pm_dists.Normal - parent = 'mu' - priors = {'sd': pm_dists.HalfCauchy.dist(beta=10, testval=1.)} + parent = "mu" + priors = {"sd": pm_dists.HalfCauchy.dist(beta=10, testval=1.0)} class Binomial(Family): link = logit likelihood = pm_dists.Binomial - parent = 'p' - priors = {'n': 1} + parent = "p" + priors = {"n": 1} class Poisson(Family): link = exp likelihood = pm_dists.Poisson - parent = 'mu' - priors = {'mu': pm_dists.HalfCauchy.dist(beta=10, testval=1.)} + parent = "mu" + priors = {"mu": pm_dists.HalfCauchy.dist(beta=10, testval=1.0)} class NegativeBinomial(Family): link = exp likelihood = pm_dists.NegativeBinomial - parent = 'mu' - priors = {'mu': pm_dists.HalfCauchy.dist(beta=10, testval=1.), - 'alpha': pm_dists.HalfCauchy.dist(beta=10, testval=1.)} + parent = "mu" + priors = { + "mu": pm_dists.HalfCauchy.dist(beta=10, testval=1.0), + "alpha": pm_dists.HalfCauchy.dist(beta=10, testval=1.0), + } diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index f795f1909c..dc0bd468f5 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -6,10 +6,7 @@ from .utils import any_to_tensor_and_labels -__all__ = [ - 'LinearComponent', - 'GLM' -] +__all__ = ["LinearComponent", "GLM"] class LinearComponent(Model): @@ -32,11 +29,22 @@ class LinearComponent(Model): this can be used to specify an a priori known component to be included in the linear predictor during fitting. """ - default_regressor_prior = Normal.dist(mu=0, tau=1.0E-6) + + default_regressor_prior = Normal.dist(mu=0, tau=1.0e-6) default_intercept_prior = Flat.dist() - def __init__(self, x, y, intercept=True, labels=None, - priors=None, vars=None, name='', model=None, offset=0.): + def __init__( + self, + x, + y, + intercept=True, + labels=None, + priors=None, + vars=None, + name="", + model=None, + offset=0.0, + ): super(LinearComponent, self).__init__(name, model) if priors is None: priors = {} @@ -45,23 +53,16 @@ def __init__(self, x, y, intercept=True, labels=None, x, labels = any_to_tensor_and_labels(x, labels) # now we have x, shape and labels if intercept: - x = tt.concatenate( - [tt.ones((x.shape[0], 1), x.dtype), x], - axis=1 - ) - labels = ['Intercept'] + labels + x = tt.concatenate([tt.ones((x.shape[0], 1), x.dtype), x], axis=1) + labels = ["Intercept"] + labels coeffs = list() for name in labels: - if name == 'Intercept': + if name == "Intercept": if name in vars: v = Deterministic(name, vars[name]) else: v = self.Var( - name=name, - dist=priors.get( - name, - self.default_intercept_prior - ) + name=name, dist=priors.get(name, self.default_intercept_prior) ) coeffs.append(v) else: @@ -71,26 +72,32 @@ def __init__(self, x, y, intercept=True, labels=None, v = self.Var( name=name, dist=priors.get( - name, - priors.get( - 'Regressor', - self.default_regressor_prior - ) - ) + name, priors.get("Regressor", self.default_regressor_prior) + ), ) coeffs.append(v) self.coeffs = tt.stack(coeffs, axis=0) self.y_est = x.dot(self.coeffs) + offset @classmethod - def from_formula(cls, formula, data, priors=None, vars=None, - name='', model=None, offset=0.): + def from_formula( + cls, formula, data, priors=None, vars=None, name="", model=None, offset=0.0 + ): import patsy + y, x = patsy.dmatrices(formula, data) labels = x.design_info.column_names - return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False, - labels=labels, priors=priors, vars=vars, name=name, - model=model, offset=offset) + return cls( + np.asarray(x), + np.asarray(y)[:, -1], + intercept=False, + labels=labels, + priors=priors, + vars=vars, + name=name, + model=model, + offset=offset, + ) class GLM(LinearComponent): @@ -115,13 +122,30 @@ class GLM(LinearComponent): this can be used to specify an a priori known component to be included in the linear predictor during fitting. """ - def __init__(self, x, y, intercept=True, labels=None, - priors=None, vars=None, family='normal', name='', - model=None, offset=0.): + + def __init__( + self, + x, + y, + intercept=True, + labels=None, + priors=None, + vars=None, + family="normal", + name="", + model=None, + offset=0.0, + ): super(GLM, self).__init__( - x, y, intercept=intercept, labels=labels, - priors=priors, vars=vars, name=name, - model=model, offset=offset + x, + y, + intercept=intercept, + labels=labels, + priors=priors, + vars=vars, + name=name, + model=model, + offset=offset, ) _families = dict( @@ -134,19 +158,37 @@ def __init__(self, x, y, intercept=True, labels=None, if isinstance(family, str): family = _families[family]() self.y_est = family.create_likelihood( - name='', y_est=self.y_est, - y_data=y, model=self) + name="", y_est=self.y_est, y_data=y, model=self + ) @classmethod - def from_formula(cls, formula, data, priors=None, - vars=None, family='normal', name='', - model=None, offset=0.): + def from_formula( + cls, + formula, + data, + priors=None, + vars=None, + family="normal", + name="", + model=None, + offset=0.0, + ): import patsy + y, x = patsy.dmatrices(formula, data) labels = x.design_info.column_names - return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False, - labels=labels, priors=priors, vars=vars, family=family, - name=name, model=model, offset=offset) + return cls( + np.asarray(x), + np.asarray(y)[:, -1], + intercept=False, + labels=labels, + priors=priors, + vars=vars, + family=family, + name=name, + model=model, + offset=offset, + ) glm = GLM diff --git a/pymc3/glm/utils.py b/pymc3/glm/utils.py index f2d8393f4e..638e5458ee 100644 --- a/pymc3/glm/utils.py +++ b/pymc3/glm/utils.py @@ -72,7 +72,7 @@ def any_to_tensor_and_labels(x, labels=None): elif not isinstance(x, tt.Variable): x = np.asarray(x) if x.ndim == 0: - raise ValueError('Cannot use scalars') + raise ValueError("Cannot use scalars") elif x.ndim == 1: x = x[:, None] # something really strange goes here, @@ -81,28 +81,26 @@ def any_to_tensor_and_labels(x, labels=None): elif labels is not None: x = tt.as_tensor_variable(x) if x.ndim == 0: - raise ValueError('Cannot use scalars') + raise ValueError("Cannot use scalars") elif x.ndim == 1: x = x[:, None] - else: # trust input + else: # trust input pass # we should check that we can extract labels if labels is None and not isinstance(x, tt.Variable): - labels = ['x%d' % i for i in range(x.shape[1])] + labels = ["x%d" % i for i in range(x.shape[1])] # for theano variables we should have labels from user elif labels is None: - raise ValueError('Please provide labels as ' - 'we cannot infer shape of input') - else: # trust labels, user knows what he is doing + raise ValueError("Please provide labels as " "we cannot infer shape of input") + else: # trust labels, user knows what he is doing pass # it's time to check shapes if we can if not isinstance(x, tt.Variable): if not len(labels) == x.shape[1]: raise ValueError( - 'Please provide full list ' - 'of labels for coefficients, ' - 'got len(labels)=%d instead of %d' - % (len(labels), x.shape[1]) + "Please provide full list " + "of labels for coefficients, " + "got len(labels)=%d instead of %d" % (len(labels), x.shape[1]) ) else: # trust labels, as we raised an @@ -110,7 +108,7 @@ def any_to_tensor_and_labels(x, labels=None): pass # convert labels to list if isinstance(labels, pd.RangeIndex): - labels = ['x%d' % i for i in labels] + labels = ["x%d" % i for i in labels] # maybe it was a tuple ot whatever elif not isinstance(labels, list): labels = list(labels) @@ -119,7 +117,7 @@ def any_to_tensor_and_labels(x, labels=None): x = tt.as_tensor_variable(x) # finally check dimensions if x.ndim == 0: - raise ValueError('Cannot use scalars') + raise ValueError("Cannot use scalars") elif x.ndim == 1: x = x[:, None] return x, labels diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py index 5339548666..80ac95ce1b 100644 --- a/pymc3/gp/cov.py +++ b/pymc3/gp/cov.py @@ -3,26 +3,28 @@ from functools import reduce from operator import mul, add -__all__ = ['Constant', - 'WhiteNoise', - 'ExpQuad', - 'RatQuad', - 'Exponential', - 'Matern52', - 'Matern32', - 'Linear', - 'Polynomial', - 'Cosine', - 'Periodic', - 'WarpedInput', - 'Gibbs', - 'Coregion', - 'ScaledCov', - 'Kron'] +__all__ = [ + "Constant", + "WhiteNoise", + "ExpQuad", + "RatQuad", + "Exponential", + "Matern52", + "Matern32", + "Linear", + "Polynomial", + "Cosine", + "Periodic", + "WarpedInput", + "Gibbs", + "Coregion", + "ScaledCov", + "Kron", +] class Covariance(object): - R""" + r""" Base class for all kernels/covariance functions. Parameters @@ -43,7 +45,7 @@ def __init__(self, input_dim, active_dims=None): self.active_dims = np.asarray(active_dims, np.int) def __call__(self, X, Xs=None, diag=False): - R""" + r""" Evaluate the kernel/covariance function. Parameters @@ -103,8 +105,13 @@ def __array_wrap__(self, result): class Combination(Covariance): def __init__(self, factor_list): - input_dim = max([factor.input_dim for factor in factor_list - if isinstance(factor, Covariance)]) + input_dim = max( + [ + factor.input_dim + for factor in factor_list + if isinstance(factor, Covariance) + ] + ) super(Combination, self).__init__(input_dim=input_dim) self.factor_list = [] for factor in factor_list: @@ -124,9 +131,14 @@ def merge_factors(self, X, Xs=None, diag=False): factor_list.append(np.diag(factor)) else: factor_list.append(factor) - elif isinstance(factor, (tt.TensorConstant, - tt.TensorVariable, - tt.sharedvar.TensorSharedVariable)): + elif isinstance( + factor, + ( + tt.TensorConstant, + tt.TensorVariable, + tt.sharedvar.TensorSharedVariable, + ), + ): if factor.ndim == 2 and diag: factor_list.append(tt.diag(factor)) else: @@ -147,7 +159,7 @@ def __call__(self, X, Xs=None, diag=False): class Kron(Covariance): - R"""Form a covariance object that is the kronecker product of other covariances. + r"""Form a covariance object that is the kronecker product of other covariances. In contrast to standard multiplication, where each covariance is given the same inputs X and Xs, kronecker product covariances first split the inputs @@ -179,13 +191,14 @@ def _split(self, X, Xs): def __call__(self, X, Xs=None, diag=False): X_split, Xs_split = self._split(X, Xs) - covs = [cov(x, xs, diag) for cov, x, xs - in zip(self.factor_list, X_split, Xs_split)] + covs = [ + cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split) + ] return reduce(mul, covs) class Constant(Covariance): - R""" + r""" Constant valued covariance function. .. math:: @@ -208,7 +221,7 @@ def full(self, X, Xs=None): class WhiteNoise(Covariance): - R""" + r""" White noise covariance function. .. math:: @@ -231,7 +244,7 @@ def full(self, X, Xs=None): class Stationary(Covariance): - R""" + r""" Base class for stationary kernels/covariance functions. Parameters @@ -256,13 +269,15 @@ def square_dist(self, X, Xs): X = tt.mul(X, 1.0 / self.ls) X2 = tt.sum(tt.square(X), 1) if Xs is None: - sqd = (-2.0 * tt.dot(X, tt.transpose(X)) - + (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1)))) + sqd = -2.0 * tt.dot(X, tt.transpose(X)) + ( + tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1)) + ) else: Xs = tt.mul(Xs, 1.0 / self.ls) Xs2 = tt.sum(tt.square(Xs), 1) - sqd = (-2.0 * tt.dot(X, tt.transpose(Xs)) - + (tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1)))) + sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + ( + tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1)) + ) return tt.clip(sqd, 0.0, np.inf) def euclidean_dist(self, X, Xs): @@ -277,7 +292,7 @@ def full(self, X, Xs=None): class Periodic(Stationary): - R""" + r""" The Periodic kernel. .. math:: @@ -287,19 +302,20 @@ class Periodic(Stationary): def __init__(self, input_dim, period, ls=None, ls_inv=None, active_dims=None): super(Periodic, self).__init__(input_dim, ls, ls_inv, active_dims) self.period = period + def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) if Xs is None: Xs = X - f1 = X.dimshuffle(0, 'x', 1) - f2 = Xs.dimshuffle('x', 0, 1) + f1 = X.dimshuffle(0, "x", 1) + f2 = Xs.dimshuffle("x", 0, 1) r = np.pi * (f1 - f2) / self.period r = tt.sum(tt.square(tt.sin(r) / self.ls), 2) return tt.exp(-0.5 * r) class ExpQuad(Stationary): - R""" + r""" The Exponentiated Quadratic kernel. Also refered to as the Squared Exponential, or Radial Basis Function kernel. @@ -314,7 +330,7 @@ def full(self, X, Xs=None): class RatQuad(Stationary): - R""" + r""" The Rational Quadratic kernel. .. math:: @@ -328,12 +344,14 @@ def __init__(self, input_dim, alpha, ls=None, ls_inv=None, active_dims=None): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - return (tt.power((1.0 + 0.5 * self.square_dist(X, Xs) - * (1.0 / self.alpha)), -1.0 * self.alpha)) + return tt.power( + (1.0 + 0.5 * self.square_dist(X, Xs) * (1.0 / self.alpha)), + -1.0 * self.alpha, + ) class Matern52(Stationary): - R""" + r""" The Matern kernel with nu = 5/2. .. math:: @@ -346,12 +364,13 @@ class Matern52(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) r = self.euclidean_dist(X, Xs) - return ((1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) - * tt.exp(-1.0 * np.sqrt(5.0) * r)) + return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) * tt.exp( + -1.0 * np.sqrt(5.0) * r + ) class Matern32(Stationary): - R""" + r""" The Matern kernel with nu = 3/2. .. math:: @@ -367,7 +386,7 @@ def full(self, X, Xs=None): class Exponential(Stationary): - R""" + r""" The Exponential kernel. .. math:: @@ -381,7 +400,7 @@ def full(self, X, Xs=None): class Cosine(Stationary): - R""" + r""" The Cosine kernel. .. math:: @@ -394,7 +413,7 @@ def full(self, X, Xs=None): class Linear(Covariance): - R""" + r""" The Linear kernel. .. math:: @@ -424,7 +443,7 @@ def diag(self, X): class Polynomial(Linear): - R""" + r""" The Polynomial kernel. .. math:: @@ -446,7 +465,7 @@ def diag(self, X): class WarpedInput(Covariance): - R""" + r""" Warp the inputs of any kernel using an arbitrary function defined using Theano. @@ -462,8 +481,7 @@ class WarpedInput(Covariance): Additional inputs (besides X or Xs) to warp_func. """ - def __init__(self, input_dim, cov_func, warp_func, args=None, - active_dims=None): + def __init__(self, input_dim, cov_func, warp_func, args=None, active_dims=None): super(WarpedInput, self).__init__(input_dim, active_dims) if not callable(warp_func): raise TypeError("warp_func must be callable") @@ -486,7 +504,7 @@ def diag(self, X): class Gibbs(Covariance): - R""" + r""" The Gibbs kernel. Use an arbitrary lengthscale function defined using Theano. Only tested in one dimension. @@ -503,17 +521,18 @@ class Gibbs(Covariance): Additional inputs (besides X or Xs) to lengthscale_func. """ - def __init__(self, input_dim, lengthscale_func, args=None, - active_dims=None): + def __init__(self, input_dim, lengthscale_func, args=None, active_dims=None): super(Gibbs, self).__init__(input_dim, active_dims) if active_dims is not None: if len(active_dims) > 1: - raise NotImplementedError(("Higher dimensional inputs ", - "are untested")) + raise NotImplementedError( + ("Higher dimensional inputs ", "are untested") + ) else: if input_dim != 1: - raise NotImplementedError(("Higher dimensional inputs ", - "are untested")) + raise NotImplementedError( + ("Higher dimensional inputs ", "are untested") + ) if not callable(lengthscale_func): raise TypeError("lengthscale_func must be callable") self.lfunc = handle_args(lengthscale_func, args) @@ -522,12 +541,14 @@ def __init__(self, input_dim, lengthscale_func, args=None, def square_dist(self, X, Xs=None): X2 = tt.sum(tt.square(X), 1) if Xs is None: - sqd = (-2.0 * tt.dot(X, tt.transpose(X)) - + (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1)))) + sqd = -2.0 * tt.dot(X, tt.transpose(X)) + ( + tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1)) + ) else: Xs2 = tt.sum(tt.square(Xs), 1) - sqd = (-2.0 * tt.dot(X, tt.transpose(Xs)) - + (tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1)))) + sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + ( + tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1)) + ) return tt.clip(sqd, 0.0, np.inf) def full(self, X, Xs=None): @@ -541,15 +562,16 @@ def full(self, X, Xs=None): r2 = self.square_dist(X, Xs) rx2 = tt.reshape(tt.square(rx), (-1, 1)) rz2 = tt.reshape(tt.square(rz), (1, -1)) - return (tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) - * tt.exp(-1.0 * r2 / (rx2 + rz2))) + return tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) * tt.exp( + -1.0 * r2 / (rx2 + rz2) + ) def diag(self, X): return tt.alloc(1.0, X.shape[0]) class ScaledCov(Covariance): - R""" + r""" Construct a kernel by multiplying a base kernel with a scaling function defined using Theano. The scaling function is non-negative, and can be parameterized. @@ -566,6 +588,7 @@ class ScaledCov(Covariance): args : optional, tuple or list of scalars or PyMC3 variables Additional inputs (besides X or Xs) to lengthscale_func. """ + def __init__(self, input_dim, cov_func, scaling_func, args=None, active_dims=None): super(ScaledCov, self).__init__(input_dim, active_dims) if not callable(scaling_func): @@ -593,7 +616,7 @@ def full(self, X, Xs=None): class Coregion(Covariance): - R"""Covariance function for intrinsic/linear coregionalization models. + r"""Covariance function for intrinsic/linear coregionalization models. Adapted from GPy http://gpy.readthedocs.io/en/deploy/GPy.kern.src.html#GPy.kern.src.coregionalize.Coregionalize. This covariance has the form: @@ -628,10 +651,12 @@ class Coregion(Covariance): def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): super(Coregion, self).__init__(input_dim, active_dims) if len(self.active_dims) != 1: - raise ValueError('Coregion requires exactly one dimension to be active') + raise ValueError("Coregion requires exactly one dimension to be active") make_B = W is not None or kappa is not None if make_B and B is not None: - raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion') + raise ValueError( + "Exactly one of (W, kappa) and B must be provided to Coregion" + ) if make_B: self.W = tt.as_tensor_variable(W) self.kappa = tt.as_tensor_variable(kappa) @@ -639,20 +664,22 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): elif B is not None: self.B = tt.as_tensor_variable(B) else: - raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion') + raise ValueError( + "Exactly one of (W, kappa) and B must be provided to Coregion" + ) def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - index = tt.cast(X, 'int32') + index = tt.cast(X, "int32") if Xs is None: index2 = index.T else: - index2 = tt.cast(Xs, 'int32').T + index2 = tt.cast(Xs, "int32").T return self.B[index, index2] def diag(self, X): X, _ = self._slice(X, None) - index = tt.cast(X, 'int32') + index = tt.cast(X, "int32") return tt.diag(self.B)[index.ravel()] @@ -664,6 +691,5 @@ def f(x, args): if not isinstance(args, tuple): args = (args,) return func(x, *args) - return f - + return f diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py index 522352ba12..ef906c2807 100644 --- a/pymc3/gp/gp.py +++ b/pymc3/gp/gp.py @@ -7,18 +7,23 @@ import pymc3 as pm from pymc3.gp.cov import Covariance, Constant from pymc3.gp.mean import Zero -from pymc3.gp.util import (conditioned_vars, infer_shape, - stabilize, cholesky, solve_lower, solve_upper) +from pymc3.gp.util import ( + conditioned_vars, + infer_shape, + stabilize, + cholesky, + solve_lower, + solve_upper, +) from pymc3.distributions import draw_values from theano.tensor.nlinalg import eigh -from ..math import (cartesian, kron_dot, kron_diag, - kron_solve_lower, kron_solve_upper) +from ..math import cartesian, kron_dot, kron_diag, kron_solve_lower, kron_solve_upper -__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse', 'LatentKron', 'MarginalKron'] +__all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"] class Base(object): - R""" + r""" Base class. """ @@ -49,7 +54,7 @@ def predict(self, Xnew, point=None, given=None, diag=False): @conditioned_vars(["X", "f"]) class Latent(Base): - R""" + r""" Latent Gaussian process. The `gp.Latent` class is a direct implementation of a GP. No addiive @@ -118,7 +123,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs): return f def prior(self, name, X, reparameterize=True, **kwargs): - R""" + r""" Returns the GP prior distribution evaluated over the input locations `X`. @@ -150,14 +155,14 @@ def prior(self, name, X, reparameterize=True, **kwargs): def _get_given_vals(self, given): if given is None: given = {} - if 'gp' in given: - cov_total = given['gp'].cov_func - mean_total = given['gp'].mean_func + if "gp" in given: + cov_total = given["gp"].cov_func + mean_total = given["gp"].mean_func else: cov_total = self.cov_func mean_total = self.mean_func - if all(val in given for val in ['X', 'f']): - X, f = given['X'], given['f'] + if all(val in given for val in ["X", "f"]): + X, f = given["X"], given["f"] else: X, f = self.X, self.f return X, f, cov_total, mean_total @@ -174,7 +179,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total): return mu, cov def conditional(self, name, Xnew, given=None, **kwargs): - R""" + r""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -240,7 +245,9 @@ class TP(Latent): def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), nu=None): if nu is None: - raise ValueError("Student's T process requires a degrees of freedom parameter, 'nu'") + raise ValueError( + "Student's T process requires a degrees of freedom parameter, 'nu'" + ) self.nu = nu super(TP, self).__init__(mean_func, cov_func) @@ -254,13 +261,15 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs): if reparameterize: chi2 = pm.ChiSquared("chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) - f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) + f = pm.Deterministic( + name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v)) + ) else: f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs) return f def prior(self, name, X, reparameterize=True, **kwargs): - R""" + r""" Returns the TP prior distribution evaluated over the input locations `X`. @@ -296,11 +305,11 @@ def _build_conditional(self, Xnew, X, f): mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) beta = tt.dot(v, v) nu2 = self.nu + X.shape[0] - covT = (self.nu + beta - 2)/(nu2 - 2) * cov + covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT def conditional(self, name, Xnew, **kwargs): - R""" + r""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -328,7 +337,7 @@ def conditional(self, name, Xnew, **kwargs): @conditioned_vars(["X", "y", "noise"]) class Marginal(Base): - R""" + r""" Marginal Gaussian process. The `gp.Marginal` class is an implementation of the sum of a GP @@ -383,7 +392,7 @@ def _build_marginal_likelihood(self, X, noise): return mu, cov def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): - R""" + r""" Returns the marginal likelihood distribution, given the input locations `X` and the data `y`. @@ -430,22 +439,23 @@ def _get_given_vals(self, given): if given is None: given = {} - if 'gp' in given: - cov_total = given['gp'].cov_func - mean_total = given['gp'].mean_func + if "gp" in given: + cov_total = given["gp"].cov_func + mean_total = given["gp"].mean_func else: cov_total = self.cov_func mean_total = self.mean_func - if all(val in given for val in ['X', 'y', 'noise']): - X, y, noise = given['X'], given['y'], given['noise'] + if all(val in given for val in ["X", "y", "noise"]): + X, y, noise = given["X"], given["y"], given["noise"] if not isinstance(noise, Covariance): noise = pm.gp.cov.WhiteNoise(noise) else: X, y, noise = self.X, self.y, self.noise return X, y, noise, cov_total, mean_total - def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, - cov_total, mean_total): + def _build_conditional( + self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total + ): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) Knx = noise(X) @@ -468,7 +478,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, return mu, cov if pred_noise else stabilize(cov) def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): - R""" + r""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -506,7 +516,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): - R""" + r""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP estimate or a sample from a `trace`. @@ -534,7 +544,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): return draw_values([mu, cov], point=point) def predictt(self, Xnew, diag=False, pred_noise=False, given=None): - R""" + r""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. @@ -559,7 +569,7 @@ def predictt(self, Xnew, diag=False, pred_noise=False, given=None): @conditioned_vars(["X", "Xu", "y", "sigma"]) class MarginalSparse(Marginal): - R""" + r""" Approximate marginal Gaussian process. The `gp.MarginalSparse` class is an implementation of the sum of a GP @@ -653,9 +663,9 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): trace = 0.0 elif self.approx == "VFE": Lamd = tt.ones_like(Qffd) * sigma2 - trace = ((1.0 / (2.0 * sigma2)) * - (tt.sum(self.cov_func(X, diag=True)) - - tt.sum(tt.sum(A * A, 0)))) + trace = (1.0 / (2.0 * sigma2)) * ( + tt.sum(self.cov_func(X, diag=True)) - tt.sum(tt.sum(A * A, 0)) + ) else: # DTC Lamd = tt.ones_like(Qffd) * sigma2 trace = 0.0 @@ -669,8 +679,10 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace) - def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs): - R""" + def marginal_likelihood( + self, name, X, Xu, y, noise=None, is_observed=True, **kwargs + ): + r""" Returns the approximate marginal likelihood distribution, given the input locations `X`, inducing point locations `Xu`, data `y`, and white noise standard deviations `sigma`. @@ -701,25 +713,29 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw self.Xu = Xu self.y = y if noise is None: - sigma = kwargs.get('sigma') + sigma = kwargs.get("sigma") if sigma is None: - raise ValueError('noise argument must be specified') + raise ValueError("noise argument must be specified") else: self.sigma = sigma warnings.warn( "The 'sigma' argument has been deprecated. Use 'noise' instead.", - DeprecationWarning) + DeprecationWarning, + ) else: self.sigma = noise - logp = functools.partial(self._build_marginal_likelihood_logp, - X=X, Xu=Xu, sigma=noise) + logp = functools.partial( + self._build_marginal_likelihood_logp, X=X, Xu=Xu, sigma=noise + ) if is_observed: return pm.DensityDist(name, logp, observed=y, **kwargs) else: shape = infer_shape(X, kwargs.pop("shape", None)) return pm.DensityDist(name, logp, shape=shape, **kwargs) - def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): + def _build_conditional( + self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total + ): sigma2 = tt.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) @@ -738,7 +754,9 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, c = solve_lower(L_B, tt.dot(A, r_l)) Kus = self.cov_func(Xu, Xnew) As = solve_lower(Luu, Kus) - mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c)) + mu = self.mean_func(Xnew) + tt.dot( + tt.transpose(As), solve_upper(tt.transpose(L_B), c) + ) C = solve_lower(L_B, As) if diag: Kss = self.cov_func(Xnew, diag=True) @@ -747,8 +765,11 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, var += sigma2 return mu, var else: - cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) + - tt.dot(tt.transpose(C), C)) + cov = ( + self.cov_func(Xnew) + - tt.dot(tt.transpose(As), As) + + tt.dot(tt.transpose(C), C) + ) if pred_noise: cov += sigma2 * tt.identity_like(cov) return mu, cov if pred_noise else stabilize(cov) @@ -756,20 +777,20 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, def _get_given_vals(self, given): if given is None: given = {} - if 'gp' in given: - cov_total = given['gp'].cov_func - mean_total = given['gp'].mean_func + if "gp" in given: + cov_total = given["gp"].cov_func + mean_total = given["gp"].mean_func else: cov_total = self.cov_func mean_total = self.mean_func - if all(val in given for val in ['X', 'Xu', 'y', 'sigma']): - X, Xu, y, sigma = given['X'], given['Xu'], given['y'], given['sigma'] + if all(val in given for val in ["X", "Xu", "y", "sigma"]): + X, Xu, y, sigma = given["X"], given["Xu"], given["y"], given["sigma"] else: X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma return X, Xu, y, sigma, cov_total, mean_total def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): - R""" + r""" Returns the approximate conditional distribution of the GP evaluated over new input locations `Xnew`. @@ -800,7 +821,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): @conditioned_vars(["Xs", "f"]) class LatentKron(Base): - R""" + r""" Latent Gaussian process whose covariance is a tensor product kernel. The `gp.LatentKron` class is a direct implementation of a GP with a @@ -861,7 +882,7 @@ def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): super(LatentKron, self).__init__(mean_func, cov_func) def __add__(self, other): - raise TypeError('Additive, Kronecker-structured processes not implemented') + raise TypeError("Additive, Kronecker-structured processes not implemented") def _build_prior(self, name, Xs, **kwargs): self.N = np.prod([len(X) for X in Xs]) @@ -891,7 +912,7 @@ def prior(self, name, Xs, **kwargs): distribution constructor. """ if len(Xs) != len(self.cov_funcs): - raise ValueError('Must provide a covariance function for each X') + raise ValueError("Must provide a covariance function for each X") f = self._build_prior(name, Xs, **kwargs) self.Xs = Xs self.f = f @@ -952,7 +973,7 @@ def conditional(self, name, Xnew, **kwargs): @conditioned_vars(["Xs", "y", "sigma"]) class MarginalKron(Base): - R""" + r""" Marginal Gaussian process whose covariance is a tensor product kernel. The `gp.MarginalKron` class is an implementation of the sum of a @@ -1017,7 +1038,7 @@ def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): super(MarginalKron, self).__init__(mean_func, cov_func) def __add__(self, other): - raise TypeError('Additive, Kronecker-structured processes not implemented') + raise TypeError("Additive, Kronecker-structured processes not implemented") def _build_marginal_likelihood(self, Xs): self.X = cartesian(*Xs) @@ -1028,10 +1049,14 @@ def _build_marginal_likelihood(self, Xs): def _check_inputs(self, Xs, y): N = np.prod([len(X) for X in Xs]) if len(Xs) != len(self.cov_funcs): - raise ValueError('Must provide a covariance function for each X') + raise ValueError("Must provide a covariance function for each X") if N != len(y): - raise ValueError(('Length of y ({}) must match length of cartesian' - 'cartesian product of Xs ({})').format(len(y), N)) + raise ValueError( + ( + "Length of y ({}) must match length of cartesian" + "cartesian product of Xs ({})" + ).format(len(y), N) + ) def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): """ @@ -1065,12 +1090,14 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): self.y = y self.sigma = sigma if is_observed: - return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, - observed=y, **kwargs) + return pm.KroneckerNormal( + name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs + ) else: shape = np.prod([len(X) for X in Xs]) - return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, - shape=shape, **kwargs) + return pm.KroneckerNormal( + name, mu=mu, covs=covs, sigma=sigma, shape=shape, **kwargs + ) def _build_conditional(self, Xnew, pred_noise, diag): Xs, y, sigma = self.Xs, self.y, self.sigma @@ -1083,7 +1110,7 @@ def _build_conditional(self, Xnew, pred_noise, diag): QTs = list(map(tt.transpose, Qs)) eigs = kron_diag(*eigs_sep) # Combine separate eigs if sigma is not None: - eigs += sigma**2 + eigs += sigma ** 2 # New points Km = self.cov_func(Xnew, diag=diag) @@ -1092,13 +1119,13 @@ def _build_conditional(self, Xnew, pred_noise, diag): # Build conditional mu alpha = kron_dot(QTs, delta) - alpha = alpha/eigs[:, None] + alpha = alpha / eigs[:, None] alpha = kron_dot(Qs, alpha) mu = tt.dot(Kmn, alpha).ravel() + self.mean_func(Xnew) # Build conditional cov A = kron_dot(QTs, Knm) - A = A/tt.sqrt(eigs[:, None]) + A = A / tt.sqrt(eigs[:, None]) if diag: Asq = tt.sum(tt.square(A), 0) cov = Km - Asq @@ -1150,7 +1177,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs): return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False): - R""" + r""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP estimate or a sample from a `trace`. @@ -1173,7 +1200,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False): return draw_values([mu, cov], point=point) def predictt(self, Xnew, diag=False, pred_noise=False): - R""" + r""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. diff --git a/pymc3/gp/mean.py b/pymc3/gp/mean.py index 638cc6d429..5d9ce1b206 100644 --- a/pymc3/gp/mean.py +++ b/pymc3/gp/mean.py @@ -1,15 +1,15 @@ import theano.tensor as tt -__all__ = ['Zero', 'Constant', 'Linear'] +__all__ = ["Zero", "Constant", "Linear"] class Mean(object): - R""" + r""" Base class for mean functions """ def __call__(self, X): - R""" + r""" Evaluate the mean function. Parameters @@ -26,7 +26,7 @@ def __mul__(self, other): class Zero(Mean): - R""" + r""" Zero mean function for Gaussian process. """ @@ -34,8 +34,9 @@ class Zero(Mean): def __call__(self, X): return tt.alloc(0.0, X.shape[0]) + class Constant(Mean): - R""" + r""" Constant mean function for Gaussian process. Parameters @@ -53,7 +54,7 @@ def __call__(self, X): class Linear(Mean): - R""" + r""" Linear mean function for Gaussian process. Parameters @@ -91,4 +92,3 @@ def __init__(self, first_mean, second_mean): def __call__(self, X): return tt.mul(self.m1(X), self.m2(X)) - diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py index 92fe23f583..7135b4fca2 100644 --- a/pymc3/gp/util.py +++ b/pymc3/gp/util.py @@ -3,9 +3,9 @@ import theano.tensor as tt cholesky = tt.slinalg.cholesky -solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') -solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') -solve = tt.slinalg.Solve(A_structure='general') +solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") +solve_upper = tt.slinalg.Solve(A_structure="upper_triangular") +solve = tt.slinalg.Solve(A_structure="general") def infer_shape(X, n_points=None): @@ -29,10 +29,14 @@ def kmeans_inducing_points(n_inducing, X): elif isinstance(X, (np.ndarray, tuple, list)): X = np.asarray(X) else: - raise TypeError(("To use K-means initialization, " - "please provide X as a type that " - "can be cast to np.ndarray, instead " - "of {}".format(type(X)))) + raise TypeError( + ( + "To use K-means initialization, " + "please provide X as a type that " + "can be cast to np.ndarray, instead " + "of {}".format(type(X)) + ) + ) scaling = np.std(X, 0) # if std of a column is very small (zero), don't normalize that column scaling[scaling <= 1e-6] = 1.0 @@ -43,33 +47,51 @@ def kmeans_inducing_points(n_inducing, X): def conditioned_vars(varnames): """ Decorator for validating attrs that are conditioned on. """ + def gp_wrapper(cls): def make_getter(name): def getter(self): value = getattr(self, name, None) if value is None: - raise AttributeError(("'{}' not set. Provide as argument " - "to condition, or call 'prior' " - "first".format(name.lstrip("_")))) + raise AttributeError( + ( + "'{}' not set. Provide as argument " + "to condition, or call 'prior' " + "first".format(name.lstrip("_")) + ) + ) else: return value return getattr(self, name) + return getter def make_setter(name): def setter(self, val): setattr(self, name, val) + return setter for name in varnames: - getter = make_getter('_' + name) - setter = make_setter('_' + name) + getter = make_getter("_" + name) + setter = make_setter("_" + name) setattr(cls, name, property(getter, setter)) return cls + return gp_wrapper -def plot_gp_dist(ax, samples, x, plot_samples=True, palette="Reds", fill_alpha=0.8, samples_alpha=0.1, fill_kwargs=None, samples_kwargs=None): +def plot_gp_dist( + ax, + samples, + x, + plot_samples=True, + palette="Reds", + fill_alpha=0.8, + samples_alpha=0.1, + fill_kwargs=None, + samples_kwargs=None, +): """ A helper function for plotting 1D GP posteriors from trace Parameters @@ -112,13 +134,21 @@ def plot_gp_dist(ax, samples, x, plot_samples=True, palette="Reds", fill_alpha=0 x = x.flatten() for i, p in enumerate(percs[::-1]): upper = np.percentile(samples, p, axis=1) - lower = np.percentile(samples, 100-p, axis=1) + lower = np.percentile(samples, 100 - p, axis=1) color_val = colors[i] - ax.fill_between(x, upper, lower, color=cmap(color_val), alpha=fill_alpha, **fill_kwargs) + ax.fill_between( + x, upper, lower, color=cmap(color_val), alpha=fill_alpha, **fill_kwargs + ) if plot_samples: # plot a few samples idx = np.random.randint(0, samples.shape[1], 30) - ax.plot(x, samples[:,idx], color=cmap(0.9), lw=1, alpha=samples_alpha, - **samples_kwargs) + ax.plot( + x, + samples[:, idx], + color=cmap(0.9), + lw=1, + alpha=samples_alpha, + **samples_kwargs + ) return ax diff --git a/pymc3/math.py b/pymc3/math.py index 25a057f724..6edfbe05fe 100644 --- a/pymc3/math.py +++ b/pymc3/math.py @@ -1,13 +1,51 @@ from __future__ import division import sys import theano.tensor as tt + # pylint: disable=unused-import import theano from theano.tensor import ( - constant, flatten, zeros_like, ones_like, stack, concatenate, sum, prod, - lt, gt, le, ge, eq, neq, switch, clip, where, and_, or_, abs_, exp, log, - cos, sin, tan, cosh, sinh, tanh, sqr, sqrt, erf, erfc, erfinv, erfcinv, dot, - maximum, minimum, sgn, ceil, floor) + constant, + flatten, + zeros_like, + ones_like, + stack, + concatenate, + sum, + prod, + lt, + gt, + le, + ge, + eq, + neq, + switch, + clip, + where, + and_, + or_, + abs_, + exp, + log, + cos, + sin, + tan, + cosh, + sinh, + tanh, + sqr, + sqrt, + erf, + erfc, + erfinv, + erfcinv, + dot, + maximum, + minimum, + sgn, + ceil, + floor, +) from theano.tensor.nlinalg import det, matrix_inverse, extract_diag, matrix_dot, trace import theano.tensor.slinalg import theano.sparse @@ -43,7 +81,7 @@ def cartesian(*arrays): 1D arrays where earlier arrays loop more slowly than later ones """ N = len(arrays) - return np.stack(np.meshgrid(*arrays, indexing='ij'), -1).reshape(-1, N) + return np.stack(np.meshgrid(*arrays, indexing="ij"), -1).reshape(-1, N) def kron_matrix_op(krons, m, op): @@ -58,6 +96,7 @@ def kron_matrix_op(krons, m, op): m : NxM array or 1D array (treated as Nx1) Object that krons act upon """ + def flat_matrix_op(flat_mat, mat): Nmat = mat.shape[1] flat_shape = flat_mat.shape @@ -70,7 +109,7 @@ def kron_vector_op(v): if m.ndim == 1: m = m[:, None] # Treat 1D array as Nx1 matrix if m.ndim != 2: # Has not been tested otherwise - raise ValueError('m must have ndim <= 2, not {}'.format(mat.ndim)) + raise ValueError("m must have ndim <= 2, not {}".format(mat.ndim)) res = kron_vector_op(m) res_shape = res.shape return tt.reshape(res, (res_shape[1], res_shape[0])).T @@ -81,6 +120,7 @@ def kron_vector_op(v): kron_solve_lower = partial(kron_matrix_op, op=tt.slinalg.solve_lower_triangular) kron_solve_upper = partial(kron_matrix_op, op=tt.slinalg.solve_upper_triangular) + def flat_outer(a, b): return tt.outer(a, b).ravel() @@ -101,7 +141,7 @@ def tround(*args, **kwargs): Temporary function to silence round warning in Theano. Please remove when the warning disappears. """ - kwargs['mode'] = 'half_to_even' + kwargs["mode"] = "half_to_even" return tt.round(*args, **kwargs) @@ -113,9 +153,7 @@ def logsumexp(x, axis=None): def logaddexp(a, b): diff = b - a - return tt.switch(diff > 0, - b + tt.log1p(tt.exp(-diff)), - a + tt.log1p(tt.exp(diff))) + return tt.switch(diff > 0, b + tt.log1p(tt.exp(-diff)), a + tt.log1p(tt.exp(diff))) def logdiffexp(a, b): @@ -125,7 +163,7 @@ def logdiffexp(a, b): def invlogit(x, eps=sys.float_info.epsilon): """The inverse of the logit function, 1 / (1 + exp(-x)).""" - return (1. - 2. * eps) / (1. + tt.exp(-x)) + eps + return (1.0 - 2.0 * eps) / (1.0 + tt.exp(-x)) + eps def logit(p): @@ -148,10 +186,7 @@ def log1mexp(x): For details, see https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf """ - return tt.switch( - tt.lt(x, 0.683), - tt.log(-tt.expm1(-x)), - tt.log1p(-tt.exp(-x))) + return tt.switch(tt.lt(x, 0.683), tt.log(-tt.expm1(-x)), tt.log1p(-tt.exp(-x))) def flatten_list(tensors): @@ -168,6 +203,7 @@ class LogDet(Op): Once PR #3959 (https://github.com/Theano/Theano/pull/3959/) by harpone is merged, this must be removed. """ + def make_node(self, x): x = theano.tensor.as_tensor_variable(x) o = theano.tensor.scalar(dtype=x.dtype) @@ -181,7 +217,7 @@ def perform(self, node, inputs, outputs, params=None): log_det = np.sum(np.log(np.abs(s))) z[0] = np.asarray(log_det, dtype=x.dtype) except Exception: - print('Failed to compute logdet of {}.'.format(x)) + print("Failed to compute logdet of {}.".format(x)) raise def grad(self, inputs, g_outputs): @@ -192,19 +228,20 @@ def grad(self, inputs, g_outputs): def __str__(self): return "LogDet" + logdet = LogDet() def probit(p): - return -sqrt(2.) * erfcinv(2. * p) + return -sqrt(2.0) * erfcinv(2.0 * p) def invprobit(x): - return .5 * erfc(-x / sqrt(2.)) + return 0.5 * erfc(-x / sqrt(2.0)) def expand_packed_triangular(n, packed, lower=True, diagonal_only=False): - R"""Convert a packed triangular matrix into a two dimensional array. + r"""Convert a packed triangular matrix into a two dimensional array. Triangular matrices can be stored with better space efficiancy by storing the non-zero values in a one-dimensional array. We number @@ -227,9 +264,9 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False): If true, return only the diagonal of the matrix. """ if packed.ndim != 1: - raise ValueError('Packed triagular is not one dimensional.') + raise ValueError("Packed triagular is not one dimensional.") if not isinstance(n, int): - raise TypeError('n must be an integer') + raise TypeError("n must be an integer") if diagonal_only and lower: diag_idxs = np.arange(1, n + 1).cumsum() - 1 @@ -251,12 +288,13 @@ class BatchedDiag(tt.Op): """ Fast BatchedDiag allocation """ + __props__ = () def make_node(self, diag): diag = tt.as_tensor_variable(diag) if diag.type.ndim != 2: - raise TypeError('data argument must be a matrix', diag.type) + raise TypeError("data argument must be a matrix", diag.type) return tt.Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)]) @@ -278,7 +316,7 @@ def grad(self, inputs, gout): return [gz[..., idx, idx]] def infer_shape(self, nodes, shapes): - return [(shapes[0][0], ) + (shapes[0][1],) * 2] + return [(shapes[0][0],) + (shapes[0][1],) * 2] def batched_diag(C): @@ -292,26 +330,30 @@ def batched_diag(C): idx = tt.arange(dim) return C[..., idx, idx] else: - raise ValueError('Input should be 2 or 3 dimensional') + raise ValueError("Input should be 2 or 3 dimensional") class BlockDiagonalMatrix(Op): - __props__ = ('sparse', 'format') + __props__ = ("sparse", "format") - def __init__(self, sparse=False, format='csr'): - if format not in ('csr', 'csc'): - raise ValueError("format must be one of: 'csr', 'csc', got {}".format(format)) + def __init__(self, sparse=False, format="csr"): + if format not in ("csr", "csc"): + raise ValueError( + "format must be one of: 'csr', 'csc', got {}".format(format) + ) self.sparse = sparse self.format = format def make_node(self, *matrices): if not matrices: - raise ValueError('no matrices to allocate') + raise ValueError("no matrices to allocate") matrices = list(map(tt.as_tensor, matrices)) if any(mat.type.ndim != 2 for mat in matrices): - raise TypeError('all data arguments must be matrices') + raise TypeError("all data arguments must be matrices") if self.sparse: - out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices)) + out_type = theano.sparse.matrix( + self.format, dtype=largest_common_dtype(matrices) + ) else: out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices)) return tt.Apply(self, matrices, [out_type]) @@ -319,9 +361,7 @@ def make_node(self, *matrices): def perform(self, node, inputs, output_storage, params=None): dtype = largest_common_dtype(inputs) if self.sparse: - output_storage[0][0] = sp.sparse.block_diag( - inputs, self.format, dtype - ) + output_storage[0][0] = sp.sparse.block_diag(inputs, self.format, dtype) else: output_storage[0][0] = scipy_block_diag(*inputs).astype(dtype) @@ -329,9 +369,13 @@ def grad(self, inputs, gout): shapes = tt.stack([i.shape for i in inputs]) index_end = shapes.cumsum(0) index_begin = index_end - shapes - slices = [ix_(tt.arange(index_begin[i, 0], index_end[i, 0]), - tt.arange(index_begin[i, 1], index_end[i, 1]) - ) for i in range(len(inputs))] + slices = [ + ix_( + tt.arange(index_begin[i, 0], index_end[i, 0]), + tt.arange(index_begin[i, 1], index_end[i, 1]), + ) + for i in range(len(inputs)) + ] return [gout[0][slc] for slc in slices] def infer_shape(self, nodes, shapes): @@ -339,7 +383,7 @@ def infer_shape(self, nodes, shapes): return [(tt.add(*first), tt.add(*second))] -def block_diagonal(matrices, sparse=False, format='csr'): +def block_diagonal(matrices, sparse=False, format="csr"): r"""See scipy.sparse.block_diag or scipy.linalg.block_diag for reference diff --git a/pymc3/memoize.py b/pymc3/memoize.py index 48cf73be4f..c5541e4cec 100644 --- a/pymc3/memoize.py +++ b/pymc3/memoize.py @@ -2,6 +2,7 @@ import pickle import collections from .util import biwrap + CACHE_REGISTRY = [] @@ -23,14 +24,15 @@ def memoizer(*args, **kwargs): else: # bound methods have self as first argument, remove it to compute key key = (hashable(args[1:]), hashable(kwargs)) - if not hasattr(args[0], '_cache'): - setattr(args[0], '_cache', collections.defaultdict(dict)) + if not hasattr(args[0], "_cache"): + setattr(args[0], "_cache", collections.defaultdict(dict)) # do not add to cache regestry - cache = getattr(args[0], '_cache')[obj.__name__] + cache = getattr(args[0], "_cache")[obj.__name__] if key not in cache: cache[key] = obj(*args, **kwargs) return cache[key] + return memoizer @@ -40,7 +42,7 @@ def clear_cache(obj=None): c.clear() else: if isinstance(obj, WithMemoization): - for v in getattr(obj, '_cache', {}).values(): + for v in getattr(obj, "_cache", {}).values(): v.clear() else: obj.cache.clear() @@ -52,7 +54,7 @@ def __hash__(self): def __getstate__(self): state = self.__dict__.copy() - state.pop('_cache', None) + state.pop("_cache", None) return state def __setstate__(self, state): @@ -73,7 +75,7 @@ def hashable(a): try: return hash(pickle.dumps(a)) except Exception: - if hasattr(a, '__dict__'): + if hasattr(a, "__dict__"): return hashable(a.__dict__) else: return id(a) diff --git a/pymc3/model.py b/pymc3/model.py index 06284e8ee8..f7fdd9be13 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -21,11 +21,18 @@ from .util import get_transformed_name __all__ = [ - 'Model', 'Factor', 'compilef', 'fn', 'fastfn', 'modelcontext', - 'Point', 'Deterministic', 'Potential' + "Model", + "Factor", + "compilef", + "fn", + "fastfn", + "modelcontext", + "Point", + "Deterministic", + "Potential", ] -FlatView = collections.namedtuple('FlatView', 'input, replacements, view') +FlatView = collections.namedtuple("FlatView", "input, replacements, view") class InstanceMethod(object): @@ -42,8 +49,9 @@ def __call__(self, *args, **kwargs): return getattr(self.obj, self.method_name)(*args, **kwargs) -def incorporate_methods(source, destination, methods, default=None, - wrapper=None, override=False): +def incorporate_methods( + source, destination, methods, default=None, wrapper=None, override=False +): """ Add attributes to a destination object which points to methods from from a source object. @@ -68,9 +76,11 @@ def incorporate_methods(source, destination, methods, default=None, """ for method in methods: if hasattr(destination, method) and not override: - raise AttributeError("Cannot add method {!r}".format(method) + - "to destination object as it already exists. " - "To prevent this error set 'override=True'.") + raise AttributeError( + "Cannot add method {!r}".format(method) + + "to destination object as it already exists. " + "To prevent this error set 'override=True'." + ) if hasattr(source, method): if wrapper is None: setattr(destination, method, getattr(source, method)) @@ -79,6 +89,7 @@ def incorporate_methods(source, destination, methods, default=None, else: setattr(destination, method, None) + def get_named_nodes_and_relations(graph): """Get the named nodes in a theano graph (i.e., nodes whose name attribute is not None) along with their relationships (i.e., the @@ -110,9 +121,11 @@ def get_named_nodes_and_relations(graph): node_children = {} return _get_named_nodes_and_relations(graph, None, {}, node_parents, node_children) -def _get_named_nodes_and_relations(graph, parent, leaf_nodes, - node_parents, node_children): - if getattr(graph, 'owner', None) is None: # Leaf node + +def _get_named_nodes_and_relations( + graph, parent, leaf_nodes, node_parents, node_children +): + if getattr(graph, "owner", None) is None: # Leaf node if graph.name is not None: # Named leaf node leaf_nodes.update({graph.name: graph}) if parent is not None: # Is None for the root node @@ -137,9 +150,9 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes, # Init the nodes children to an empty set node_children[graph] = set() for i in graph.owner.inputs: - temp_nodes, temp_inter, temp_tree = \ - _get_named_nodes_and_relations(i, parent, leaf_nodes, - node_parents, node_children) + temp_nodes, temp_inter, temp_tree = _get_named_nodes_and_relations( + i, parent, leaf_nodes, node_parents, node_children + ) leaf_nodes.update(temp_nodes) node_parents.update(temp_inter) node_children.update(temp_tree) @@ -150,26 +163,27 @@ class Context(object): """Functionality for objects that put themselves in a context using the `with` statement. """ + contexts = threading.local() def __enter__(self): type(self).get_contexts().append(self) # self._theano_config is set in Model.__new__ - if hasattr(self, '_theano_config'): + if hasattr(self, "_theano_config"): self._old_theano_config = set_theano_conf(self._theano_config) return self def __exit__(self, typ, value, traceback): type(self).get_contexts().pop() # self._theano_config is set in Model.__new__ - if hasattr(self, '_old_theano_config'): + if hasattr(self, "_old_theano_config"): set_theano_conf(self._old_theano_config) @classmethod def get_contexts(cls): # no race-condition here, cls.contexts is a thread-local object # be sure not to override contexts in a subclass however! - if not hasattr(cls.contexts, 'stack'): + if not hasattr(cls.contexts, "stack"): cls.contexts.stack = [] return cls.contexts.stack @@ -195,6 +209,7 @@ class Factor(object): """Common functionality for objects with a log probability density associated with them. """ + def __init__(self, *args, **kwargs): super(Factor, self).__init__(*args, **kwargs) @@ -255,28 +270,29 @@ def fastd2logp_nojac(self, vars=None): @property def logpt(self): """Theano scalar of log-probability of the model""" - if getattr(self, 'total_size', None) is not None: + if getattr(self, "total_size", None) is not None: logp = self.logp_sum_unscaledt * self.scaling else: logp = self.logp_sum_unscaledt if self.name is not None: - logp.name = '__logp_%s' % self.name + logp.name = "__logp_%s" % self.name return logp @property def logp_nojact(self): """Theano scalar of log-probability, excluding jacobian terms.""" - if getattr(self, 'total_size', None) is not None: + if getattr(self, "total_size", None) is not None: logp = tt.sum(self.logp_nojac_unscaledt) * self.scaling else: logp = tt.sum(self.logp_nojac_unscaledt) if self.name is not None: - logp.name = '__logp_%s' % self.name + logp.name = "__logp_%s" % self.name return logp class InitContextMeta(type): """Metaclass that executes `__init__` of instance in it's context""" + def __call__(cls, *args, **kwargs): instance = cls.__new__(cls, *args, **kwargs) with instance: # appends context @@ -286,11 +302,13 @@ def __call__(cls, *args, **kwargs): def withparent(meth): """Helper wrapper that passes calls to parent's instance""" + def wrapped(self, *args, **kwargs): res = meth(self, *args, **kwargs) - if getattr(self, 'parent', None) is not None: + if getattr(self, "parent", None) is not None: getattr(self.parent, meth.__name__)(*args, **kwargs) return res + # Unfortunately functools wrapper fails # when decorating built-in methods so we # need to fix that improper behaviour @@ -303,12 +321,14 @@ class treelist(list): to parent list instance. Extending treelist you will also extend its parent """ + def __init__(self, iterable=(), parent=None): super(treelist, self).__init__(iterable) assert isinstance(parent, list) or parent is None self.parent = parent if self.parent is not None: self.parent.extend(self) + # typechecking here works bad append = withparent(list.append) __iadd__ = withparent(list.__iadd__) @@ -316,18 +336,18 @@ def __init__(self, iterable=(), parent=None): def tree_contains(self, item): if isinstance(self.parent, treedict): - return (list.__contains__(self, item) or - self.parent.tree_contains(item)) + return list.__contains__(self, item) or self.parent.tree_contains(item) elif isinstance(self.parent, list): - return (list.__contains__(self, item) or - self.parent.__contains__(item)) + return list.__contains__(self, item) or self.parent.__contains__(item) else: return list.__contains__(self, item) def __setitem__(self, key, value): - raise NotImplementedError('Method is removed as we are not' - ' able to determine ' - 'appropriate logic for it') + raise NotImplementedError( + "Method is removed as we are not" + " able to determine " + "appropriate logic for it" + ) def __imul__(self, other): t0 = len(self) @@ -341,12 +361,14 @@ class treedict(dict): to parent dict instance. Extending treedict you will also extend its parent """ + def __init__(self, iterable=(), parent=None, **kwargs): super(treedict, self).__init__(iterable, **kwargs) assert isinstance(parent, dict) or parent is None self.parent = parent if self.parent is not None: self.parent.update(self) + # typechecking here works bad __setitem__ = withparent(dict.__setitem__) update = withparent(dict.update) @@ -354,11 +376,9 @@ def __init__(self, iterable=(), parent=None, **kwargs): def tree_contains(self, item): # needed for `add_random_variable` method if isinstance(self.parent, treedict): - return (dict.__contains__(self, item) or - self.parent.tree_contains(item)) + return dict.__contains__(self, item) or self.parent.tree_contains(item) elif isinstance(self.parent, dict): - return (dict.__contains__(self, item) or - self.parent.__contains__(item)) + return dict.__contains__(self, item) or self.parent.__contains__(item) else: return dict.__contains__(self, item) @@ -395,19 +415,21 @@ class ValueGradFunction(object): gradient. This is None unless `profile=True` was set in the kwargs. """ - def __init__(self, cost, grad_vars, extra_vars=None, dtype=None, - casting='no', **kwargs): + + def __init__( + self, cost, grad_vars, extra_vars=None, dtype=None, casting="no", **kwargs + ): if extra_vars is None: extra_vars = [] names = [arg.name for arg in grad_vars + extra_vars] if any(name is None for name in names): - raise ValueError('Arguments must be named.') + raise ValueError("Arguments must be named.") if len(set(names)) != len(names): - raise ValueError('Names of the arguments are not unique.') + raise ValueError("Names of the arguments are not unique.") if cost.ndim > 0: - raise ValueError('Cost must be a scalar.') + raise ValueError("Cost must be a scalar.") self._grad_vars = grad_vars self._extra_vars = extra_vars @@ -421,31 +443,35 @@ def __init__(self, cost, grad_vars, extra_vars=None, dtype=None, self.dtype = dtype for var in self._grad_vars: if not np.can_cast(var.dtype, self.dtype, casting): - raise TypeError('Invalid dtype for variable %s. Can not ' - 'cast to %s with casting rule %s.' - % (var.name, self.dtype, casting)) + raise TypeError( + "Invalid dtype for variable %s. Can not " + "cast to %s with casting rule %s." % (var.name, self.dtype, casting) + ) if not np.issubdtype(var.dtype, np.floating): - raise TypeError('Invalid dtype for variable %s. Must be ' - 'floating point but is %s.' - % (var.name, var.dtype)) + raise TypeError( + "Invalid dtype for variable %s. Must be " + "floating point but is %s." % (var.name, var.dtype) + ) givens = [] self._extra_vars_shared = {} for var in extra_vars: - shared = theano.shared(var.tag.test_value, var.name + '_shared__') + shared = theano.shared(var.tag.test_value, var.name + "_shared__") self._extra_vars_shared[var.name] = shared givens.append((var, shared)) self._vars_joined, self._cost_joined = self._build_joined( - self._cost, grad_vars, self._ordering.vmap) + self._cost, grad_vars, self._ordering.vmap + ) grad = tt.grad(self._cost_joined, self._vars_joined) - grad.name = '__grad' + grad.name = "__grad" inputs = [self._vars_joined] self._theano_function = theano.function( - inputs, [self._cost_joined, grad], givens=givens, **kwargs) + inputs, [self._cost_joined, grad], givens=givens, **kwargs + ) def set_extra_values(self, extra_vars): self._extra_are_set = True @@ -454,21 +480,25 @@ def set_extra_values(self, extra_vars): def get_extra_values(self): if not self._extra_are_set: - raise ValueError('Extra values are not set.') + raise ValueError("Extra values are not set.") - return {var.name: self._extra_vars_shared[var.name].get_value() - for var in self._extra_vars} + return { + var.name: self._extra_vars_shared[var.name].get_value() + for var in self._extra_vars + } def __call__(self, array, grad_out=None, extra_vars=None): if extra_vars is not None: self.set_extra_values(extra_vars) if not self._extra_are_set: - raise ValueError('Extra values are not set.') + raise ValueError("Extra values are not set.") if array.shape != (self.size,): - raise ValueError('Invalid shape for array. Must be %s but is %s.' - % ((self.size,), array.shape)) + raise ValueError( + "Invalid shape for array. Must be %s but is %s." + % ((self.size,), array.shape) + ) if grad_out is None: out = np.empty_like(array) @@ -497,11 +527,14 @@ def dict_to_array(self, point): def array_to_dict(self, array): """Convert an array to a dictionary containing the grad_vars.""" if array.shape != (self.size,): - raise ValueError('Array should have shape (%s,) but has %s' - % (self.size, array.shape)) + raise ValueError( + "Array should have shape (%s,) but has %s" % (self.size, array.shape) + ) if array.dtype != self.dtype: - raise ValueError('Array has invalid dtype. Should be %s but is %s' - % (self._dtype, self.dtype)) + raise ValueError( + "Array has invalid dtype. Should be %s but is %s" + % (self._dtype, self.dtype) + ) point = {} for varmap in self._ordering.vmap: data = array[varmap.slc].reshape(varmap.shp) @@ -517,7 +550,7 @@ def array_to_full_dict(self, array): return point def _build_joined(self, cost, args, vmap): - args_joined = tt.vector('__args_joined') + args_joined = tt.vector("__args_joined") args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype) joined_slices = {} @@ -618,22 +651,23 @@ def __init__(self, mean=0, sd=1, name='', model=None): CustomModel(mean=1, name='first') CustomModel(mean=2, name='second') """ + def __new__(cls, *args, **kwargs): # resolves the parent instance instance = super(Model, cls).__new__(cls) - if kwargs.get('model') is not None: - instance._parent = kwargs.get('model') + if kwargs.get("model") is not None: + instance._parent = kwargs.get("model") elif cls.get_contexts(): instance._parent = cls.get_contexts()[-1] else: instance._parent = None - theano_config = kwargs.get('theano_config', None) - if theano_config is None or 'compute_test_value' not in theano_config: - theano_config = {'compute_test_value': 'raise'} + theano_config = kwargs.get("theano_config", None) + if theano_config is None or "compute_test_value" not in theano_config: + theano_config = {"compute_test_value": "raise"} instance._theano_config = theano_config return instance - def __init__(self, name='', model=None, theano_config=None): + def __init__(self, name="", model=None, theano_config=None): self.name = name if self.parent is not None: self.named_vars = treedict(parent=self.parent.named_vars) @@ -674,8 +708,7 @@ def isroot(self): def bijection(self): vars = inputvars(self.cont_vars) - bij = DictToArrayBijection(ArrayOrdering(vars), - self.test_point) + bij = DictToArrayBijection(ArrayOrdering(vars), self.test_point) return bij @@ -702,8 +735,9 @@ def logp_dlogp_function(self, grad_vars=None, **kwargs): else: for var in grad_vars: if var.dtype not in continuous_types: - raise ValueError("Can only compute the gradient of " - "continuous types: %s" % var) + raise ValueError( + "Can only compute the gradient of " "continuous types: %s" % var + ) varnames = [var.name for var in grad_vars] extra_vars = [var for var in self.free_RVs if var.name not in varnames] return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs) @@ -715,9 +749,9 @@ def logpt(self): factors = [var.logpt for var in self.basic_RVs] + self.potentials logp = tt.sum([tt.sum(factor) for factor in factors]) if self.name: - logp.name = '__logp_%s' % self.name + logp.name = "__logp_%s" % self.name else: - logp.name = '__logp' + logp.name = "__logp" return logp @property @@ -727,9 +761,9 @@ def logp_nojact(self): factors = [var.logp_nojact for var in self.basic_RVs] + self.potentials logp = tt.sum([tt.sum(factor) for factor in factors]) if self.name: - logp.name = '__logp_nojac_%s' % self.name + logp.name = "__logp_nojac_%s" % self.name else: - logp.name = '__logp_nojac' + logp.name = "__logp_nojac" return logp @property @@ -769,8 +803,7 @@ def unobserved_RVs(self): @property def test_point(self): """Test point used to check that the model doesn't generate errors""" - return Point(((var, var.tag.test_value) for var in self.vars), - model=self) + return Point(((var, var.tag.test_value) for var in self.vars), model=self) @property def disc_vars(self): @@ -804,27 +837,39 @@ def Var(self, name, dist, data=None, total_size=None): if data is None: if getattr(dist, "transform", None) is None: with self: - var = FreeRV(name=name, distribution=dist, - total_size=total_size, model=self) + var = FreeRV( + name=name, distribution=dist, total_size=total_size, model=self + ) self.free_RVs.append(var) else: with self: - var = TransformedRV(name=name, distribution=dist, - transform=dist.transform, - total_size=total_size, - model=self) - pm._log.debug('Applied {transform}-transform to {name}' - ' and added transformed {orig_name} to model.'.format( - transform=dist.transform.name, - name=name, - orig_name=get_transformed_name(name, dist.transform))) + var = TransformedRV( + name=name, + distribution=dist, + transform=dist.transform, + total_size=total_size, + model=self, + ) + pm._log.debug( + "Applied {transform}-transform to {name}" + " and added transformed {orig_name} to model.".format( + transform=dist.transform.name, + name=name, + orig_name=get_transformed_name(name, dist.transform), + ) + ) self.deterministics.append(var) self.add_random_variable(var) return var elif isinstance(data, dict): with self: - var = MultiObservedRV(name=name, data=data, distribution=dist, - total_size=total_size, model=self) + var = MultiObservedRV( + name=name, + data=data, + distribution=dist, + total_size=total_size, + model=self, + ) self.observed_RVs.append(var) if var.missing_values: self.free_RVs += var.missing_values @@ -833,9 +878,13 @@ def Var(self, name, dist, data=None, total_size=None): self.named_vars[v.name] = v else: with self: - var = ObservedRV(name=name, data=data, - distribution=dist, - total_size=total_size, model=self) + var = ObservedRV( + name=name, + data=data, + distribution=dist, + total_size=total_size, + model=self, + ) self.observed_RVs.append(var) if var.missing_values: self.free_RVs.append(var.missing_values) @@ -848,22 +897,21 @@ def Var(self, name, dist, data=None, total_size=None): def add_random_variable(self, var): """Add a random variable to the named variables of the model.""" if self.named_vars.tree_contains(var.name): - raise ValueError( - "Variable name {} already exists.".format(var.name)) + raise ValueError("Variable name {} already exists.".format(var.name)) self.named_vars[var.name] = var if not hasattr(self, self.name_of(var.name)): setattr(self, self.name_of(var.name), var) @property def prefix(self): - return '%s_' % self.name if self.name else '' + return "%s_" % self.name if self.name else "" def name_for(self, name): """Checks if name has prefix and adds if needed """ if self.prefix: if not name.startswith(self.prefix): - return '{}{}'.format(self.prefix, name) + return "{}{}".format(self.prefix, name) else: return name else: @@ -875,7 +923,7 @@ def name_of(self, name): if not self.prefix or not name: return name elif name.startswith(self.prefix): - return name[len(self.prefix):] + return name[len(self.prefix) :] else: return name @@ -902,11 +950,16 @@ def makefn(self, outs, mode=None, *args, **kwargs): Compiled Theano function """ with self: - return theano.function(self.vars, outs, - allow_input_downcast=True, - on_unused_input='ignore', - accept_inplace=True, - mode=mode, *args, **kwargs) + return theano.function( + self.vars, + outs, + allow_input_downcast=True, + on_unused_input="ignore", + accept_inplace=True, + mode=mode, + *args, + **kwargs + ) def fn(self, outs, mode=None, *args, **kwargs): """Compiles a Theano function which returns the values of ``outs`` @@ -994,14 +1047,16 @@ def flatten(self, vars=None, order=None, inputvar=None): if order is None: order = ArrayOrdering(vars) if inputvar is None: - inputvar = tt.vector('flat_view', dtype=theano.config.floatX) - if theano.config.compute_test_value != 'off': + inputvar = tt.vector("flat_view", dtype=theano.config.floatX) + if theano.config.compute_test_value != "off": if vars: inputvar.tag.test_value = flatten_list(vars).tag.test_value else: inputvar.tag.test_value = np.asarray([], inputvar.dtype) - replacements = {self.named_vars[name]: inputvar[slc].reshape(shape).astype(dtype) - for name, slc, shape, dtype in order.vmap} + replacements = { + self.named_vars[name]: inputvar[slc].reshape(shape).astype(dtype) + for name, slc, shape, dtype in order.vmap + } view = {vm.var: vm for vm in order.vmap} flat_view = FlatView(inputvar, replacements, view) return flat_view @@ -1024,21 +1079,28 @@ def check_test_point(self, test_point=None, round_vals=2): if test_point is None: test_point = self.test_point - return Series({RV.name:np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs}, - name='Log-probability of test_point') + return Series( + { + RV.name: np.round(RV.logp(self.test_point), round_vals) + for RV in self.basic_RVs + }, + name="Log-probability of test_point", + ) def _repr_latex_(self, name=None, dist=None): tex_vars = [] for rv in itertools.chain(self.unobserved_RVs, self.observed_RVs): rv_tex = rv.__latex__() if rv_tex is not None: - array_rv = rv_tex.replace(r'\sim', r'&\sim &').strip('$') + array_rv = rv_tex.replace(r"\sim", r"&\sim &").strip("$") tex_vars.append(array_rv) - return r'''$$ + return r"""$$ \begin{{array}}{{rcl}} {} \end{{array}} - $$'''.format('\\\\'.join(tex_vars)) + $$""".format( + "\\\\".join(tex_vars) + ) __latex__ = _repr_latex_ @@ -1086,15 +1148,15 @@ def Point(*args, **kwargs): args, kwargs arguments to build a dict """ - model = modelcontext(kwargs.pop('model', None)) + model = modelcontext(kwargs.pop("model", None)) args = list(args) try: d = dict(*args, **kwargs) except Exception as e: - raise TypeError( - "can't turn {} and {} into a dict. {}".format(args, kwargs, e)) - return dict((str(k), np.array(v)) for k, v in d.items() - if str(k) in map(str, model.vars)) + raise TypeError("can't turn {} and {} into a dict. {}".format(args, kwargs, e)) + return dict( + (str(k), np.array(v)) for k, v in d.items() if str(k) in map(str, model.vars) + ) class FastPointFunc(object): @@ -1119,6 +1181,7 @@ def __call__(self, *args, **kwargs): point = Point(model=self.model, *args, **kwargs) return self.f(**point) + compilef = fastfn @@ -1147,43 +1210,65 @@ def _get_scaling(total_size, shape, ndim): denom = 1 coef = floatX(total_size) / floatX(denom) elif isinstance(total_size, (list, tuple)): - if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): - raise TypeError('Unrecognized `total_size` type, expected ' - 'int or list of ints, got %r' % total_size) + if not all( + isinstance(i, int) + for i in total_size + if (i is not Ellipsis and i is not None) + ): + raise TypeError( + "Unrecognized `total_size` type, expected " + "int or list of ints, got %r" % total_size + ) if Ellipsis in total_size: sep = total_size.index(Ellipsis) begin = total_size[:sep] - end = total_size[sep+1:] + end = total_size[sep + 1 :] if Ellipsis in end: - raise ValueError('Double Ellipsis in `total_size` is restricted, got %r' % total_size) + raise ValueError( + "Double Ellipsis in `total_size` is restricted, got %r" % total_size + ) else: begin = total_size end = [] if (len(begin) + len(end)) > ndim: - raise ValueError('Length of `total_size` is too big, ' - 'number of scalings is bigger that ndim, got %r' % total_size) + raise ValueError( + "Length of `total_size` is too big, " + "number of scalings is bigger that ndim, got %r" % total_size + ) elif (len(begin) + len(end)) == 0: return floatX(1) if len(end) > 0: - shp_end = shape[-len(end):] + shp_end = shape[-len(end) :] else: shp_end = np.asarray([]) - shp_begin = shape[:len(begin)] - begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] + shp_begin = shape[: len(begin)] + begin_coef = [ + floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None + ] end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] coefs = begin_coef + end_coef coef = tt.prod(coefs) else: - raise TypeError('Unrecognized `total_size` type, expected ' - 'int or list of ints, got %r' % total_size) + raise TypeError( + "Unrecognized `total_size` type, expected " + "int or list of ints, got %r" % total_size + ) return tt.as_tensor(floatX(coef)) class FreeRV(Factor, TensorVariable): """Unobserved random variable that a model is specified in terms of.""" - def __init__(self, type=None, owner=None, index=None, name=None, - distribution=None, total_size=None, model=None): + def __init__( + self, + type=None, + owner=None, + index=None, + name=None, + distribution=None, + total_size=None, + model=None, + ): """ Parameters ---------- @@ -1203,8 +1288,9 @@ def __init__(self, type=None, owner=None, index=None, name=None, self.dshape = tuple(distribution.shape) self.dsize = int(np.prod(distribution.shape)) self.distribution = distribution - self.tag.test_value = np.ones( - distribution.shape, distribution.dtype) * distribution.default() + self.tag.test_value = ( + np.ones(distribution.shape, distribution.dtype) * distribution.default() + ) self.logp_elemwiset = distribution.logp(self) # The logp might need scaling in minibatches. # This is done in `Factor`. @@ -1214,9 +1300,12 @@ def __init__(self, type=None, owner=None, index=None, name=None, self.model = model self.scaling = _get_scaling(total_size, self.shape, self.ndim) - incorporate_methods(source=distribution, destination=self, - methods=['random'], - wrapper=InstanceMethod) + incorporate_methods( + source=distribution, + destination=self, + methods=["random"], + wrapper=InstanceMethod, + ) def _repr_latex_(self, name=None, dist=None): if self.distribution is None: @@ -1236,12 +1325,12 @@ def init_value(self): def pandas_to_array(data): - if hasattr(data, 'values'): # pandas + if hasattr(data, "values"): # pandas if data.isnull().any().any(): # missing values ret = np.ma.MaskedArray(data.values, data.isnull().values) else: ret = data.values - elif hasattr(data, 'mask'): + elif hasattr(data, "mask"): ret = data elif isinstance(data, theano.gof.graph.Variable): ret = data @@ -1258,17 +1347,22 @@ def as_tensor(data, name, model, distribution): dtype = distribution.dtype data = pandas_to_array(data).astype(dtype) - if hasattr(data, 'mask'): + if hasattr(data, "mask"): from .distributions import NoDistribution + testval = np.broadcast_to(distribution.default(), data.shape)[data.mask] - fakedist = NoDistribution.dist(shape=data.mask.sum(), dtype=dtype, - testval=testval, parent_dist=distribution) - missing_values = FreeRV(name=name + '_missing', distribution=fakedist, - model=model) + fakedist = NoDistribution.dist( + shape=data.mask.sum(), + dtype=dtype, + testval=testval, + parent_dist=distribution, + ) + missing_values = FreeRV( + name=name + "_missing", distribution=fakedist, model=model + ) constant = tt.as_tensor_variable(data.filled()) - dataTensor = tt.set_subtensor( - constant[data.mask.nonzero()], missing_values) + dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], missing_values) dataTensor.missing_values = missing_values return dataTensor elif sps.issparse(data): @@ -1286,8 +1380,17 @@ class ObservedRV(Factor, TensorVariable): Potentially partially observed. """ - def __init__(self, type=None, owner=None, index=None, name=None, data=None, - distribution=None, total_size=None, model=None): + def __init__( + self, + type=None, + owner=None, + index=None, + name=None, + data=None, + distribution=None, + total_size=None, + model=None, + ): """ Parameters ---------- @@ -1301,7 +1404,7 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None, """ from .distributions import TensorType - if hasattr(data, 'type') and isinstance(data.type, tt.TensorType): + if hasattr(data, "type") and isinstance(data.type, tt.TensorType): type = data.type if type is None: @@ -1326,8 +1429,7 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None, self.distribution = distribution # make this RV a view on the combined missing/nonmissing array - theano.gof.Apply(theano.compile.view_op, - inputs=[data], outputs=[self]) + theano.gof.Apply(theano.compile.view_op, inputs=[data], outputs=[self]) self.tag.test_value = theano.compile.view_op(data).tag.test_value self.scaling = _get_scaling(total_size, data.shape, data.ndim) @@ -1366,11 +1468,16 @@ def __init__(self, name, data, distribution, total_size=None, model=None): needed for upscaling logp """ self.name = name - self.data = {name: as_tensor(data, name, model, distribution) - for name, data in data.items()} - - self.missing_values = [datum.missing_values for datum in self.data.values() - if datum.missing_values is not None] + self.data = { + name: as_tensor(data, name, model, distribution) + for name, data in data.items() + } + + self.missing_values = [ + datum.missing_values + for datum in self.data.values() + if datum.missing_values is not None + ] self.logp_elemwiset = distribution.logp(**self.data) # The logp might need scaling in minibatches. # This is done in `Factor`. @@ -1379,7 +1486,9 @@ def __init__(self, name, data, distribution, total_size=None, model=None): self.total_size = total_size self.model = model self.distribution = distribution - self.scaling = _get_scaling(total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim) + self.scaling = _get_scaling( + total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim + ) def _walk_up_rv(rv): @@ -1391,15 +1500,18 @@ def _walk_up_rv(rv): all_rvs.extend(_walk_up_rv(parent)) else: if rv.name: - all_rvs.append(r'\text{%s}' % rv.name) + all_rvs.append(r"\text{%s}" % rv.name) else: - all_rvs.append(r'\text{Constant}') + all_rvs.append(r"\text{Constant}") return all_rvs def _latex_repr_rv(rv): """Make latex string for a Deterministic variable""" - return r'$\text{%s} \sim \text{Deterministic}(%s)$' % (rv.name, r',~'.join(_walk_up_rv(rv))) + return r"$\text{%s} \sim \text{Deterministic}(%s)$" % ( + rv.name, + r",~".join(_walk_up_rv(rv)), + ) def Deterministic(name, var, model=None): @@ -1456,9 +1568,17 @@ class TransformedRV(TensorVariable): needed for upscaling logp """ - def __init__(self, type=None, owner=None, index=None, name=None, - distribution=None, model=None, transform=None, - total_size=None): + def __init__( + self, + type=None, + owner=None, + index=None, + name=None, + distribution=None, + model=None, + transform=None, + total_size=None, + ): if type is None: type = distribution.type super(TransformedRV, self).__init__(type, owner, index, name) @@ -1474,17 +1594,20 @@ def __init__(self, type=None, owner=None, index=None, name=None, transformed_name = get_transformed_name(name, transform) self.transformed = model.Var( - transformed_name, transform.apply(distribution), total_size=total_size) + transformed_name, transform.apply(distribution), total_size=total_size + ) normalRV = transform.backward(self.transformed) - theano.Apply(theano.compile.view_op, inputs=[ - normalRV], outputs=[self]) + theano.Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self]) self.tag.test_value = normalRV.tag.test_value self.scaling = _get_scaling(total_size, self.shape, self.ndim) - incorporate_methods(source=distribution, destination=self, - methods=['random'], - wrapper=InstanceMethod) + incorporate_methods( + source=distribution, + destination=self, + methods=["random"], + wrapper=InstanceMethod, + ) def _repr_latex_(self, name=None, dist=None): if self.distribution is None: diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py index e68a28cb17..269702a54d 100644 --- a/pymc3/model_graph.py +++ b/pymc3/model_graph.py @@ -14,21 +14,27 @@ def powerset(iterable): powerset([1,2,3]) --> (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3) """ s = list(iterable) - return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(1, len(s)+1)) + return itertools.chain.from_iterable( + itertools.combinations(s, r) for r in range(1, len(s) + 1) + ) class ModelGraph(object): def __init__(self, model): self.model = model - self.var_names = get_default_varnames(self.model.named_vars, include_transformed=False) + self.var_names = get_default_varnames( + self.model.named_vars, include_transformed=False + ) self.var_list = self.model.named_vars.values() - self.transform_map = {v.transformed: v.name for v in self.var_list if hasattr(v, 'transformed')} + self.transform_map = { + v.transformed: v.name for v in self.var_list if hasattr(v, "transformed") + } self._deterministics = None def get_deterministics(self, var): """Compute the deterministic nodes of the graph""" deterministics = [] - attrs = ('transformed', 'logpt') + attrs = ("transformed", "logpt") for v in self.var_list: if v != var and all(not hasattr(v, attr) for attr in attrs): deterministics.append(v) @@ -36,7 +42,13 @@ def get_deterministics(self, var): def _ancestors(self, var, func, blockers=None): """Get ancestors of a function that are also named PyMC3 variables""" - return set([j for j in ancestors([func], blockers=blockers) if j in self.var_list and j != var]) + return set( + [ + j + for j in ancestors([func], blockers=blockers) + if j in self.var_list and j != var + ] + ) def _get_ancestors(self, var, func): """Get all ancestors of a function, doing some accounting for deterministics @@ -51,12 +63,14 @@ def _get_ancestors(self, var, func): # Usual case if upstream == self._ancestors(var, func, blockers=upstream): return upstream - else: # deterministic accounting + else: # deterministic accounting for d in powerset(upstream): blocked = self._ancestors(var, func, blockers=d) if set(d) == blocked: return d - raise RuntimeError('Could not traverse graph. Consider raising an issue with developers.') + raise RuntimeError( + "Could not traverse graph. Consider raising an issue with developers." + ) def _filter_parents(self, var, parents): """Get direct parents of a var, as strings""" @@ -70,14 +84,14 @@ def _filter_parents(self, var, parents): if self.transform_map[p] != var.name: keep.add(self.transform_map[p]) else: - raise AssertionError('Do not know what to do with {}'.format(str(p))) + raise AssertionError("Do not know what to do with {}".format(str(p))) return keep def get_parents(self, var): """Get the named nodes that are direct inputs to the var""" - if hasattr(var, 'transformed'): + if hasattr(var, "transformed"): func = var.transformed.logpt - elif hasattr(var, 'logpt'): + elif hasattr(var, "logpt"): func = var.logpt else: func = var @@ -99,18 +113,22 @@ def _make_node(self, var_name, graph): # styling for node attrs = {} if isinstance(v, pm.model.ObservedRV): - attrs['style'] = 'filled' + attrs["style"] = "filled" # Get name for node - if hasattr(v, 'distribution'): + if hasattr(v, "distribution"): distribution = v.distribution.__class__.__name__ else: - distribution = 'Deterministic' - attrs['shape'] = 'box' + distribution = "Deterministic" + attrs["shape"] = "box" - graph.node(var_name, - '{var_name} ~ {distribution}'.format(var_name=var_name, distribution=distribution), - **attrs) + graph.node( + var_name, + "{var_name} ~ {distribution}".format( + var_name=var_name, distribution=distribution + ), + **attrs + ) def get_plates(self): """ Rough but surprisingly accurate plate detection. @@ -125,9 +143,9 @@ def get_plates(self): plates = {} for var_name in self.var_names: v = self.model[var_name] - if hasattr(v, 'observations'): + if hasattr(v, "observations"): shape = v.observations.shape - elif hasattr(v, 'dshape'): + elif hasattr(v, "dshape"): shape = v.dshape else: shape = v.tag.test_value.shape @@ -148,19 +166,21 @@ def make_graph(self): try: import graphviz except ImportError: - raise ImportError('This function requires the python library graphviz, along with binaries. ' - 'The easiest way to install all of this is by running\n\n' - '\tconda install -c conda-forge python-graphviz') + raise ImportError( + "This function requires the python library graphviz, along with binaries. " + "The easiest way to install all of this is by running\n\n" + "\tconda install -c conda-forge python-graphviz" + ) graph = graphviz.Digraph(self.model.name) for shape, var_names in self.get_plates().items(): - label = ' x '.join(map('{:,d}'.format, shape)) + label = " x ".join(map("{:,d}".format, shape)) if label: # must be preceded by 'cluster' to get a box around it - with graph.subgraph(name='cluster' + label) as sub: + with graph.subgraph(name="cluster" + label) as sub: for var_name in var_names: self._make_node(var_name, sub) # plate label goes bottom right - sub.attr(label=label, labeljust='r', labelloc='b', style='rounded') + sub.attr(label=label, labeljust="r", labelloc="b", style="rounded") else: for var_name in var_names: self._make_node(var_name, graph) diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py index 1c4952934e..a5736da55c 100644 --- a/pymc3/parallel_sampling.py +++ b/pymc3/parallel_sampling.py @@ -11,7 +11,7 @@ from . import theanof -logger = logging.getLogger('pymc3') +logger = logging.getLogger("pymc3") # Taken from https://hg.python.org/cpython/rev/c4f92b597074 @@ -26,7 +26,7 @@ def __str__(self): class ExceptionWithTraceback: def __init__(self, exc, tb): tb = traceback.format_exception(type(exc), exc, tb) - tb = ''.join(tb) + tb = "".join(tb) self.exc = exc self.tb = '\n"""\n%s"""' % tb @@ -54,8 +54,8 @@ class _Process(multiprocessing.Process): We communicate with the main process using a pipe, and send finished samples using shared memory. """ - def __init__(self, name, msg_pipe, step_method, shared_point, - draws, tune, seed): + + def __init__(self, name, msg_pipe, step_method, shared_point, draws, tune, seed): super(_Process, self).__init__(daemon=True, name=name) self._msg_pipe = msg_pipe self._step_method = step_method @@ -75,7 +75,7 @@ def run(self): pass except BaseException as e: e = ExceptionWithTraceback(e, e.__traceback__) - self._msg_pipe.send(('error', e)) + self._msg_pipe.send(("error", e)) finally: self._msg_pipe.close() @@ -103,10 +103,10 @@ def _start_loop(self): tuning = True msg = self._recv_msg() - if msg[0] == 'abort': + if msg[0] == "abort": raise KeyboardInterrupt() - if msg[0] != 'start': - raise ValueError('Unexpected msg ' + msg[0]) + if msg[0] != "start": + raise ValueError("Unexpected msg " + msg[0]) while True: if draw < self._draws + self._tune: @@ -119,9 +119,9 @@ def _start_loop(self): tuning = False msg = self._recv_msg() - if msg[0] == 'abort': + if msg[0] == "abort": raise KeyboardInterrupt() - elif msg[0] == 'write_next': + elif msg[0] == "write_next": self._write_point(point) is_last = draw + 1 == self._draws + self._tune if is_last: @@ -129,10 +129,11 @@ def _start_loop(self): else: warns = None self._msg_pipe.send( - ('writing_done', is_last, draw, tuning, stats, warns)) + ("writing_done", is_last, draw, tuning, stats, warns) + ) draw += 1 else: - raise ValueError('Unknown message ' + msg[0]) + raise ValueError("Unknown message " + msg[0]) def _compute_point(self): if self._step_method.generates_stats: @@ -143,7 +144,7 @@ def _compute_point(self): return point, stats def _collect_warnings(self): - if hasattr(self._step_method, 'warnings'): + if hasattr(self._step_method, "warnings"): return self._step_method.warnings() else: return [] @@ -151,6 +152,7 @@ def _collect_warnings(self): class ProcessAdapter(object): """Control a Chain process from the main thread.""" + def __init__(self, draws, tune, step_method, chain, seed, start): self.chain = chain process_name = "worker_chain_%s" % chain @@ -164,9 +166,9 @@ def __init__(self, draws, tune, step_method, chain, seed, start): size *= int(dim) size *= dtype.itemsize if size != ctypes.c_size_t(size).value: - raise ValueError('Variable %s is too large' % name) + raise ValueError("Variable %s is too large" % name) - array = multiprocessing.sharedctypes.RawArray('c', size) + array = multiprocessing.sharedctypes.RawArray("c", size) self._shared_point[name] = array array_np = np.frombuffer(array, dtype).reshape(shape) array_np[...] = start[name] @@ -176,8 +178,14 @@ def __init__(self, draws, tune, step_method, chain, seed, start): self._num_samples = 0 self._process = _Process( - process_name, remote_conn, step_method, self._shared_point, - draws, tune, seed) + process_name, + remote_conn, + step_method, + self._shared_point, + draws, + tune, + seed, + ) # We fork right away, so that the main process can start tqdm threads self._process.start() @@ -191,14 +199,14 @@ def shared_point_view(self): return self._point def start(self): - self._msg_pipe.send(('start',)) + self._msg_pipe.send(("start",)) def write_next(self): self._readable = False - self._msg_pipe.send(('write_next',)) + self._msg_pipe.send(("write_next",)) def abort(self): - self._msg_pipe.send(('abort',)) + self._msg_pipe.send(("abort",)) def join(self, timeout=None): self._process.join(timeout) @@ -209,24 +217,24 @@ def terminate(self): @staticmethod def recv_draw(processes, timeout=3600): if not processes: - raise ValueError('No processes.') + raise ValueError("No processes.") pipes = [proc._msg_pipe for proc in processes] ready = multiprocessing.connection.wait(pipes) if not ready: - raise multiprocessing.TimeoutError('No message from samplers.') + raise multiprocessing.TimeoutError("No message from samplers.") idxs = {id(proc._msg_pipe): proc for proc in processes} proc = idxs[id(ready[0])] msg = ready[0].recv() - if msg[0] == 'error': + if msg[0] == "error": old = msg[1] - six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old) - elif msg[0] == 'writing_done': + six.raise_from(RuntimeError("Chain %s failed." % proc.chain), old) + elif msg[0] == "writing_done": proc._readable = True proc._num_samples += 1 return (proc,) + msg[1:] else: - raise ValueError('Sampler sent bad message.') + raise ValueError("Sampler sent bad message.") @staticmethod def terminate_all(processes, patience=2): @@ -244,8 +252,10 @@ def terminate_all(processes, patience=2): raise multiprocessing.TimeoutError() process.join(timeout) except multiprocessing.TimeoutError: - logger.warn('Chain processes did not terminate as expected. ' - 'Terminating forcefully...') + logger.warn( + "Chain processes did not terminate as expected. " + "Terminating forcefully..." + ) for process in processes: process.terminate() for process in processes: @@ -253,25 +263,35 @@ def terminate_all(processes, patience=2): Draw = namedtuple( - 'Draw', - ['chain', 'is_last', 'draw_idx', 'tuning', 'stats', 'point', 'warnings'] + "Draw", ["chain", "is_last", "draw_idx", "tuning", "stats", "point", "warnings"] ) class ParallelSampler(object): - def __init__(self, draws, tune, chains, cores, seeds, start_points, - step_method, start_chain_num=0, progressbar=True): + def __init__( + self, + draws, + tune, + chains, + cores, + seeds, + start_points, + step_method, + start_chain_num=0, + progressbar=True, + ): if progressbar: import tqdm + tqdm_ = tqdm.tqdm if any(len(arg) != chains for arg in [seeds, start_points]): - raise ValueError( - 'Number of seeds and start_points must be %s.' % chains) + raise ValueError("Number of seeds and start_points must be %s." % chains) self._samplers = [ - ProcessAdapter(draws, tune, step_method, - chain + start_chain_num, seed, start) + ProcessAdapter( + draws, tune, step_method, chain + start_chain_num, seed, start + ) for chain, seed, start in zip(range(chains), seeds, start_points) ] @@ -286,8 +306,10 @@ def __init__(self, draws, tune, chains, cores, seeds, start_points, self._progress = None if progressbar: self._progress = tqdm_( - total=chains * (draws + tune), unit='draws', - desc='Sampling %s chains' % chains) + total=chains * (draws + tune), + unit="draws", + desc="Sampling %s chains" % chains, + ) def _make_active(self): while self._inactive and len(self._active) < self._max_active: @@ -298,7 +320,7 @@ def _make_active(self): def __iter__(self): if not self._in_context: - raise ValueError('Use ParallelSampler as context manager.') + raise ValueError("Use ParallelSampler as context manager.") self._make_active() while self._active: @@ -317,8 +339,7 @@ def __iter__(self): # and only call proc.write_next() after the yield returns. # This seems to be faster overally though, as the worker # loses less time waiting. - point = {name: val.copy() - for name, val in proc.shared_point_view.items()} + point = {name: val.copy() for name, val in proc.shared_point_view.items()} # Already called for new proc in _make_active if not is_last: diff --git a/pymc3/plots/artists.py b/pymc3/plots/artists.py index 81c9dbae51..192c0328db 100644 --- a/pymc3/plots/artists.py +++ b/pymc3/plots/artists.py @@ -13,17 +13,18 @@ def _histplot_bins(column, bins=100): return range(col_min, col_max + 2, max((col_max - col_min) // bins, 1)) -def histplot_op(ax, data, alpha=.35): +def histplot_op(ax, data, alpha=0.35): """Add a histogram for each column of the data to the provided axes.""" hs = [] for column in data.T: - hs.append(ax.hist(column, bins=_histplot_bins( - column), alpha=alpha, align='left')) + hs.append( + ax.hist(column, bins=_histplot_bins(column), alpha=alpha, align="left") + ) ax.set_xlim(np.min(data) - 0.5, np.max(data) + 0.5) return hs -def kdeplot_op(ax, data, bw, prior=None, prior_alpha=1, prior_style='--'): +def kdeplot_op(ax, data, bw, prior=None, prior_alpha=1, prior_style="--"): """Get a list of density and likelihood plots, if a prior is provided.""" ls = [] pls = [] @@ -34,26 +35,41 @@ def kdeplot_op(ax, data, bw, prior=None, prior_alpha=1, prior_style='--'): x = np.linspace(l, u, len(density)) if prior is not None: p = prior.logp(x).eval() - pls.append(ax.plot(x, np.exp(p), - alpha=prior_alpha, ls=prior_style)) + pls.append(ax.plot(x, np.exp(p), alpha=prior_alpha, ls=prior_style)) ls.append(ax.plot(x, density)) except ValueError: errored.append(str(i)) if errored: - ax.text(.27, .47, 'WARNING: KDE plot failed for: ' + ','.join(errored), - bbox={'facecolor': 'red', 'alpha': 0.5, 'pad': 10}, - style='italic') + ax.text( + 0.27, + 0.47, + "WARNING: KDE plot failed for: " + ",".join(errored), + bbox={"facecolor": "red", "alpha": 0.5, "pad": 10}, + style="italic", + ) return ls, pls -def plot_posterior_op(trace_values, ax, bw, kde_plot, point_estimate, round_to, - alpha_level, ref_val, rope, text_size=16, **kwargs): +def plot_posterior_op( + trace_values, + ax, + bw, + kde_plot, + point_estimate, + round_to, + alpha_level, + ref_val, + rope, + text_size=16, + **kwargs +): """Artist to draw posterior.""" + def format_as_percent(x, round_to=0): - return '{0:.{1:d}f}%'.format(100 * x, round_to) + return "{0:.{1:d}f}%".format(100 * x, round_to) def display_ref_val(ref_val): less_than_ref_probability = (trace_values < ref_val).mean() @@ -61,80 +77,117 @@ def display_ref_val(ref_val): ref_in_posterior = "{} <{:g}< {}".format( format_as_percent(less_than_ref_probability, 1), ref_val, - format_as_percent(greater_than_ref_probability, 1)) - ax.axvline(ref_val, ymin=0.02, ymax=.75, color='g', - linewidth=4, alpha=0.65) - ax.text(trace_values.mean(), plot_height * 0.6, ref_in_posterior, - size=text_size, horizontalalignment='center') + format_as_percent(greater_than_ref_probability, 1), + ) + ax.axvline(ref_val, ymin=0.02, ymax=0.75, color="g", linewidth=4, alpha=0.65) + ax.text( + trace_values.mean(), + plot_height * 0.6, + ref_in_posterior, + size=text_size, + horizontalalignment="center", + ) def display_rope(rope): - ax.plot(rope, (plot_height * 0.02, plot_height * 0.02), - linewidth=20, color='r', alpha=0.75) - text_props = dict(size=text_size, horizontalalignment='center', color='r') + ax.plot( + rope, + (plot_height * 0.02, plot_height * 0.02), + linewidth=20, + color="r", + alpha=0.75, + ) + text_props = dict(size=text_size, horizontalalignment="center", color="r") ax.text(rope[0], plot_height * 0.14, rope[0], **text_props) ax.text(rope[1], plot_height * 0.14, rope[1], **text_props) def display_point_estimate(): if not point_estimate: return - if point_estimate not in ('mode', 'mean', 'median'): - raise ValueError( - "Point Estimate should be in ('mode','mean','median')") - if point_estimate == 'mean': + if point_estimate not in ("mode", "mean", "median"): + raise ValueError("Point Estimate should be in ('mode','mean','median')") + if point_estimate == "mean": point_value = trace_values.mean() - elif point_estimate == 'mode': + elif point_estimate == "mode": if isinstance(trace_values[0], float): density, l, u = fast_kde(trace_values, bw) x = np.linspace(l, u, len(density)) point_value = x[np.argmax(density)] else: point_value = mode(trace_values.round(round_to))[0][0] - elif point_estimate == 'median': + elif point_estimate == "median": point_value = np.median(trace_values) - point_text = '{point_estimate}={point_value:.{round_to}f}'.format(point_estimate=point_estimate, - point_value=point_value, round_to=round_to) - - ax.text(point_value, plot_height * 0.8, point_text, - size=text_size, horizontalalignment='center') + point_text = "{point_estimate}={point_value:.{round_to}f}".format( + point_estimate=point_estimate, point_value=point_value, round_to=round_to + ) + + ax.text( + point_value, + plot_height * 0.8, + point_text, + size=text_size, + horizontalalignment="center", + ) def display_hpd(): hpd_intervals = hpd(trace_values, alpha=alpha_level) - ax.plot(hpd_intervals, (plot_height * 0.02, - plot_height * 0.02), linewidth=4, color='k') - ax.text(hpd_intervals[0], plot_height * 0.07, - hpd_intervals[0].round(round_to), - size=text_size, horizontalalignment='right') - ax.text(hpd_intervals[1], plot_height * 0.07, - hpd_intervals[1].round(round_to), - size=text_size, horizontalalignment='left') - ax.text((hpd_intervals[0] + hpd_intervals[1]) / 2, plot_height * 0.2, - format_as_percent(1 - alpha_level) + ' HPD', - size=text_size, horizontalalignment='center') + ax.plot( + hpd_intervals, + (plot_height * 0.02, plot_height * 0.02), + linewidth=4, + color="k", + ) + ax.text( + hpd_intervals[0], + plot_height * 0.07, + hpd_intervals[0].round(round_to), + size=text_size, + horizontalalignment="right", + ) + ax.text( + hpd_intervals[1], + plot_height * 0.07, + hpd_intervals[1].round(round_to), + size=text_size, + horizontalalignment="left", + ) + ax.text( + (hpd_intervals[0] + hpd_intervals[1]) / 2, + plot_height * 0.2, + format_as_percent(1 - alpha_level) + " HPD", + size=text_size, + horizontalalignment="center", + ) def format_axes(): ax.yaxis.set_ticklabels([]) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - ax.spines['left'].set_visible(False) - ax.spines['bottom'].set_visible(True) - ax.yaxis.set_ticks_position('none') - ax.xaxis.set_ticks_position('bottom') - ax.tick_params(axis='x', direction='out', width=1, length=3, - color='0.5', labelsize=text_size) - ax.spines['bottom'].set_color('0.5') + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["left"].set_visible(False) + ax.spines["bottom"].set_visible(True) + ax.yaxis.set_ticks_position("none") + ax.xaxis.set_ticks_position("bottom") + ax.tick_params( + axis="x", + direction="out", + width=1, + length=3, + color="0.5", + labelsize=text_size, + ) + ax.spines["bottom"].set_color("0.5") def set_key_if_doesnt_exist(d, key, value): if key not in d: d[key] = value if kde_plot and isinstance(trace_values[0], float): - kdeplot(trace_values, alpha=kwargs.pop('alpha', 0.35), bw=bw, ax=ax, **kwargs) + kdeplot(trace_values, alpha=kwargs.pop("alpha", 0.35), bw=bw, ax=ax, **kwargs) else: - set_key_if_doesnt_exist(kwargs, 'bins', 30) - set_key_if_doesnt_exist(kwargs, 'edgecolor', 'w') - set_key_if_doesnt_exist(kwargs, 'align', 'right') - set_key_if_doesnt_exist(kwargs, 'color', '#87ceeb') + set_key_if_doesnt_exist(kwargs, "bins", 30) + set_key_if_doesnt_exist(kwargs, "edgecolor", "w") + set_key_if_doesnt_exist(kwargs, "align", "right") + set_key_if_doesnt_exist(kwargs, "color", "#87ceeb") ax.hist(trace_values, **kwargs) plot_height = ax.get_ylim()[1] @@ -147,26 +200,27 @@ def set_key_if_doesnt_exist(d, key, value): if rope is not None: display_rope(rope) + def scale_text(figsize, text_size): - """Scale text to figsize.""" + """Scale text to figsize.""" - if text_size is None and figsize is not None: - if figsize[0] <= 11: - return 12 - else: - return figsize[0] + if text_size is None and figsize is not None: + if figsize[0] <= 11: + return 12 else: - return text_size + return figsize[0] + else: + return text_size + def get_trace_dict(tr, varnames): - traces = OrderedDict() - for v in varnames: - vals = tr.get_values(v, combine=True, squeeze=True) - if vals.ndim > 1: - vals_flat = vals.reshape(vals.shape[0], -1).T - for i, vi in enumerate(vals_flat): - traces['_'.join([v, str(i)])] = vi - else: - traces[v] = vals - return traces - \ No newline at end of file + traces = OrderedDict() + for v in varnames: + vals = tr.get_values(v, combine=True, squeeze=True) + if vals.ndim > 1: + vals_flat = vals.reshape(vals.shape[0], -1).T + for i, vi in enumerate(vals_flat): + traces["_".join([v, str(i)])] = vi + else: + traces[v] = vals + return traces diff --git a/pymc3/plots/autocorrplot.py b/pymc3/plots/autocorrplot.py index 0c9d053a52..d390f0e110 100644 --- a/pymc3/plots/autocorrplot.py +++ b/pymc3/plots/autocorrplot.py @@ -9,8 +9,16 @@ from .utils import get_default_varnames, get_axis -def autocorrplot(trace, varnames=None, max_lag=100, burn=0, plot_transformed=False, - symmetric_plot=False, ax=None, figsize=None): +def autocorrplot( + trace, + varnames=None, + max_lag=100, + burn=0, + plot_transformed=False, + symmetric_plot=False, + ax=None, + figsize=None, +): """Bar plot of the autocorrelation function for a trace. Parameters @@ -39,39 +47,53 @@ def autocorrplot(trace, varnames=None, max_lag=100, burn=0, plot_transformed=Fal ------- ax : matplotlib axes """ + def _handle_array_varnames(varname): if trace[0][varname].__class__ is np.ndarray: k = trace[varname].shape[1] for i in range(k): - yield varname + '_{0}'.format(i) + yield varname + "_{0}".format(i) else: yield varname if varnames is None: varnames = get_default_varnames(trace.varnames, plot_transformed) - varnames = list(itertools.chain.from_iterable(map(_handle_array_varnames, varnames))) + varnames = list( + itertools.chain.from_iterable(map(_handle_array_varnames, varnames)) + ) nchains = trace.nchains if figsize is None: figsize = (12, len(varnames) * 2) - ax = get_axis(ax, len(varnames), nchains, - squeeze=False, sharex=True, sharey=True, figsize=figsize) + ax = get_axis( + ax, + len(varnames), + nchains, + squeeze=False, + sharex=True, + sharey=True, + figsize=figsize, + ) max_lag = min(len(trace) - 1, max_lag) for i, v in enumerate(varnames): for j, chain in enumerate(trace.chains): try: - d = np.squeeze(trace.get_values(v, chains=[chain], burn=burn, - combine=False)) + d = np.squeeze( + trace.get_values(v, chains=[chain], burn=burn, combine=False) + ) except KeyError: - k = int(v.split('_')[-1]) - v_use = '_'.join(v.split('_')[:-1]) - d = np.squeeze(trace.get_values(v_use, chains=[chain], - burn=burn, combine=False)[:, k]) + k = int(v.split("_")[-1]) + v_use = "_".join(v.split("_")[:-1]) + d = np.squeeze( + trace.get_values(v_use, chains=[chain], burn=burn, combine=False)[ + :, k + ] + ) ax[i, j].acorr(d, detrend=plt.mlab.detrend_mean, maxlags=max_lag) diff --git a/pymc3/plots/compareplot.py b/pymc3/plots/compareplot.py index d6f31fdcfd..3542c456a6 100644 --- a/pymc3/plots/compareplot.py +++ b/pymc3/plots/compareplot.py @@ -1,12 +1,14 @@ import numpy as np + try: import matplotlib.pyplot as plt except ImportError: # mpl is optional pass -def compareplot(comp_df, insample_dev=True, se=True, dse=True, ax=None, - plot_kwargs=None): +def compareplot( + comp_df, insample_dev=True, se=True, dse=True, ax=None, plot_kwargs=None +): """ Model comparison summary plot in the style of the one used in the book Statistical Rethinking by Richard McElreath. @@ -44,59 +46,68 @@ def compareplot(comp_df, insample_dev=True, se=True, dse=True, ax=None, if plot_kwargs is None: plot_kwargs = {} - yticks_pos, step = np.linspace(0, -1, (comp_df.shape[0] * 2) - 1, - retstep=True) + yticks_pos, step = np.linspace(0, -1, (comp_df.shape[0] * 2) - 1, retstep=True) yticks_pos[1::2] = yticks_pos[1::2] + step / 2 - yticks_labels = [''] * len(yticks_pos) - - ic = 'WAIC' + yticks_labels = [""] * len(yticks_pos) + + ic = "WAIC" if ic not in comp_df.columns: - ic = 'LOO' + ic = "LOO" if dse: yticks_labels[0] = comp_df.index[0] yticks_labels[2::2] = comp_df.index[1:] ax.set_yticks(yticks_pos) - ax.errorbar(x=comp_df[ic].iloc[1:], - y=yticks_pos[1::2], - xerr=comp_df.dSE[1:], - color=plot_kwargs.get('color_dse', 'grey'), - fmt=plot_kwargs.get('marker_dse', '^')) + ax.errorbar( + x=comp_df[ic].iloc[1:], + y=yticks_pos[1::2], + xerr=comp_df.dSE[1:], + color=plot_kwargs.get("color_dse", "grey"), + fmt=plot_kwargs.get("marker_dse", "^"), + ) else: yticks_labels = comp_df.index ax.set_yticks(yticks_pos[::2]) if se: - ax.errorbar(x=comp_df[ic], - y=yticks_pos[::2], - xerr=comp_df.SE, - color=plot_kwargs.get('color_ic', 'k'), - fmt=plot_kwargs.get('marker_ic', 'o'), - mfc='None', - mew=1) + ax.errorbar( + x=comp_df[ic], + y=yticks_pos[::2], + xerr=comp_df.SE, + color=plot_kwargs.get("color_ic", "k"), + fmt=plot_kwargs.get("marker_ic", "o"), + mfc="None", + mew=1, + ) else: - ax.plot(comp_df[ic], - yticks_pos[::2], - color=plot_kwargs.get('color_ic', 'k'), - marker=plot_kwargs.get('marker_ic', 'o'), - mfc='None', - mew=1, - lw=0) + ax.plot( + comp_df[ic], + yticks_pos[::2], + color=plot_kwargs.get("color_ic", "k"), + marker=plot_kwargs.get("marker_ic", "o"), + mfc="None", + mew=1, + lw=0, + ) if insample_dev: - ax.plot(comp_df[ic] - (2 * comp_df['p'+ic]), - yticks_pos[::2], - color=plot_kwargs.get('color_insample_dev', 'k'), - marker=plot_kwargs.get('marker_insample_dev', 'o'), - lw=0) - - ax.axvline(comp_df[ic].iloc[0], - ls=plot_kwargs.get('ls_min_ic', '--'), - color=plot_kwargs.get('color_ls_min_ic', 'grey')) - - ax.set_xlabel('Deviance', fontsize=plot_kwargs.get('fontsize', 14)) + ax.plot( + comp_df[ic] - (2 * comp_df["p" + ic]), + yticks_pos[::2], + color=plot_kwargs.get("color_insample_dev", "k"), + marker=plot_kwargs.get("marker_insample_dev", "o"), + lw=0, + ) + + ax.axvline( + comp_df[ic].iloc[0], + ls=plot_kwargs.get("ls_min_ic", "--"), + color=plot_kwargs.get("color_ls_min_ic", "grey"), + ) + + ax.set_xlabel("Deviance", fontsize=plot_kwargs.get("fontsize", 14)) ax.set_yticklabels(yticks_labels) ax.set_ylim(-1 + step, 0 - step) diff --git a/pymc3/plots/densityplot.py b/pymc3/plots/densityplot.py index 74d91cdee3..839880d8e7 100644 --- a/pymc3/plots/densityplot.py +++ b/pymc3/plots/densityplot.py @@ -1,4 +1,5 @@ import numpy as np + try: import matplotlib.pyplot as plt except ImportError: # mpl is optional @@ -8,9 +9,22 @@ from ..stats import hpd -def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='mean', - colors='cycle', outline=True, hpd_markers='', shade=0., bw=4.5, figsize=None, - textsize=12, plot_transformed=False, ax=None): +def densityplot( + trace, + models=None, + varnames=None, + alpha=0.05, + point_estimate="mean", + colors="cycle", + outline=True, + hpd_markers="", + shade=0.0, + bw=4.5, + figsize=None, + textsize=12, + plot_transformed=False, + ax=None, +): """ Generates KDE plots for continuous variables and histograms for discretes ones. Plots are truncated at their 100*(1-alpha)% credible intervals. Plots are grouped @@ -64,7 +78,7 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m ax : Matplotlib axes """ - if point_estimate not in ('mean', 'median', None): + if point_estimate not in ("mean", "median", None): raise ValueError("Point estimate should be 'mean', 'median' or None") if not isinstance(trace, (list, tuple)): @@ -74,17 +88,18 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m if models is None: if length_trace > 1: - models = ['m_{}'.format(i) for i in range(length_trace)] + models = ["m_{}".format(i) for i in range(length_trace)] else: - models = [''] + models = [""] elif len(models) != length_trace: raise ValueError( - "The number of names for the models does not match the number of models") + "The number of names for the models does not match the number of models" + ) length_models = len(models) - if colors == 'cycle': - colors = ['C{}'.format(i % 10) for i in range(length_models)] + if colors == "cycle": + colors = ["C{}".format(i % 10) for i in range(length_models)] elif isinstance(colors, str): colors = [colors for i in range(length_models)] @@ -110,12 +125,32 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m if k > 1: vec = np.split(vec.T.ravel(), k) for i in range(k): - _d_helper(vec[i], vname, colors[t_idx], bw, alpha, point_estimate, - hpd_markers, outline, shade, dplot[v_idx]) + _d_helper( + vec[i], + vname, + colors[t_idx], + bw, + alpha, + point_estimate, + hpd_markers, + outline, + shade, + dplot[v_idx], + ) else: - _d_helper(vec, vname, colors[t_idx], bw, alpha, point_estimate, - hpd_markers, outline, shade, dplot[v_idx]) + _d_helper( + vec, + vname, + colors[t_idx], + bw, + alpha, + point_estimate, + hpd_markers, + outline, + shade, + dplot[v_idx], + ) if length_trace > 1: for m_idx, m in enumerate(models): @@ -127,7 +162,9 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m return dplot -def _d_helper(vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, shade, ax): +def _d_helper( + vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, shade, ax +): """ vec : array 1D array from trace @@ -148,7 +185,7 @@ def _d_helper(vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, sh (opaque). Defaults to 0. ax : matplotlib axes """ - if vec.dtype.kind == 'f': + if vec.dtype.kind == "f": density, l, u = fast_kde(vec) x = np.linspace(l, u, len(density)) hpd_ = hpd(vec, alpha) @@ -161,31 +198,31 @@ def _d_helper(vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, sh if outline: ax.plot(x[cut], density[cut], color=c) - ax.plot([xmin, xmin], [-ymin/100, ymin], color=c, ls='-') - ax.plot([xmax, xmax], [-ymax/100, ymax], color=c, ls='-') + ax.plot([xmin, xmin], [-ymin / 100, ymin], color=c, ls="-") + ax.plot([xmax, xmax], [-ymax / 100, ymax], color=c, ls="-") if shade: ax.fill_between(x, density, where=cut, color=c, alpha=shade) else: xmin, xmax = hpd(vec, alpha) - bins = range(xmin, xmax+1) + bins = range(xmin, xmax + 1) if outline: - ax.hist(vec, bins=bins, color=c, histtype='step') + ax.hist(vec, bins=bins, color=c, histtype="step") ax.hist(vec, bins=bins, color=c, alpha=shade) if hpd_markers: - ax.plot(xmin, 0, hpd_markers, color=c, markeredgecolor='k') - ax.plot(xmax, 0, hpd_markers, color=c, markeredgecolor='k') + ax.plot(xmin, 0, hpd_markers, color=c, markeredgecolor="k") + ax.plot(xmax, 0, hpd_markers, color=c, markeredgecolor="k") if point_estimate is not None: - if point_estimate == 'mean': + if point_estimate == "mean": ps = np.mean(vec) - elif point_estimate == 'median': + elif point_estimate == "median": ps = np.median(vec) - ax.plot(ps, -0.001, 'o', color=c, markeredgecolor='k') + ax.plot(ps, -0.001, "o", color=c, markeredgecolor="k") ax.set_yticks([]) ax.set_title(vname) - for pos in ['left', 'right', 'top']: + for pos in ["left", "right", "top"]: ax.spines[pos].set_visible(0) diff --git a/pymc3/plots/energyplot.py b/pymc3/plots/energyplot.py index 5fbba176ce..e6a44ee52f 100644 --- a/pymc3/plots/energyplot.py +++ b/pymc3/plots/energyplot.py @@ -1,6 +1,7 @@ import warnings import numpy as np + try: import matplotlib.pyplot as plt except ImportError: # mpl is optional @@ -8,8 +9,18 @@ from .kdeplot import kdeplot -def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, shade=0.35, bw=4.5, - frame=True, kwargs_shade=None, **kwargs): +def energyplot( + trace, + kind="kde", + figsize=None, + ax=None, + legend=True, + shade=0.35, + bw=4.5, + frame=True, + kwargs_shade=None, + **kwargs +): """Plot energy transition distribution and marginal energy distribution in order to diagnose poor exploration by HMC algorithms. @@ -46,13 +57,15 @@ def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, shade=0.35 _, ax = plt.subplots(figsize=figsize) try: - energy = trace['energy'] + energy = trace["energy"] except KeyError: - warnings.warn('There is no energy information in the passed trace.') + warnings.warn("There is no energy information in the passed trace.") return ax - series = [('Marginal energy distribution', energy - energy.mean()), - ('Energy transition distribution', np.diff(energy))] + series = [ + ("Marginal energy distribution", energy - energy.mean()), + ("Energy transition distribution", np.diff(energy)), + ] if figsize is None: figsize = (8, 6) @@ -60,17 +73,24 @@ def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, shade=0.35 if kwargs_shade is None: kwargs_shade = {} - if kind == 'kde': + if kind == "kde": for label, value in series: - kdeplot(value, label=label, shade=shade, bw=bw, ax=ax, kwargs_shade=kwargs_shade, - **kwargs) - - elif kind == 'hist': + kdeplot( + value, + label=label, + shade=shade, + bw=bw, + ax=ax, + kwargs_shade=kwargs_shade, + **kwargs + ) + + elif kind == "hist": for label, value in series: ax.hist(value, alpha=shade, label=label, **kwargs) else: - raise ValueError('Plot type {} not recognized.'.format(kind)) + raise ValueError("Plot type {} not recognized.".format(kind)) ax.set_xticks([]) ax.set_yticks([]) diff --git a/pymc3/plots/forestplot.py b/pymc3/plots/forestplot.py index bf103903ff..aced43a213 100644 --- a/pymc3/plots/forestplot.py +++ b/pymc3/plots/forestplot.py @@ -8,6 +8,7 @@ from pymc3.stats import quantiles, hpd, dict2pd from .utils import identity_transform, get_default_varnames + def _var_str(name, shape): """Return a sequence of strings naming the element of the tallyable object. @@ -17,8 +18,8 @@ def _var_str(name, shape): """ size = np.prod(shape) ind = (np.indices(shape)).reshape(-1, size) - names = ['[' + ','.join(map(str, i)) + ']' for i in zip(*ind)] - names[0] = '%s %s' % (name, names[0]) + names = ["[" + ",".join(map(str, i)) + "]" for i in zip(*ind)] + names[0] = "%s %s" % (name, names[0]) return names @@ -44,31 +45,61 @@ def _plot_tree(ax, y, ntiles, show_quartiles, c, plot_kwargs): """ if show_quartiles: # Plot median - ax.plot(ntiles[2], y, color=c, - marker=plot_kwargs.get('marker', 'o'), - markersize=plot_kwargs.get('markersize', 4)) + ax.plot( + ntiles[2], + y, + color=c, + marker=plot_kwargs.get("marker", "o"), + markersize=plot_kwargs.get("markersize", 4), + ) # Plot quartile interval - ax.errorbar(x=(ntiles[1], ntiles[3]), y=(y, y), - linewidth=plot_kwargs.get('linewidth', 2), - color=c) + ax.errorbar( + x=(ntiles[1], ntiles[3]), + y=(y, y), + linewidth=plot_kwargs.get("linewidth", 2), + color=c, + ) else: # Plot median - ax.plot(ntiles[1], y, marker=plot_kwargs.get('marker', 'o'), - color=c, markersize=plot_kwargs.get('markersize', 4)) + ax.plot( + ntiles[1], + y, + marker=plot_kwargs.get("marker", "o"), + color=c, + markersize=plot_kwargs.get("markersize", 4), + ) # Plot outer interval - ax.errorbar(x=(ntiles[0], ntiles[-1]), y=(y, y), - linewidth=int(plot_kwargs.get('linewidth', 2)/2), - color=c) + ax.errorbar( + x=(ntiles[0], ntiles[-1]), + y=(y, y), + linewidth=int(plot_kwargs.get("linewidth", 2) / 2), + color=c, + ) return ax -def forestplot(trace, models=None, varnames=None, transform=identity_transform, - alpha=0.05, quartiles=True, rhat=True, main=None, xtitle=None, - xlim=None, ylabels=None, colors='C0', chain_spacing=0.1, vline=0, - gs=None, plot_transformed=False, plot_kwargs=None): +def forestplot( + trace, + models=None, + varnames=None, + transform=identity_transform, + alpha=0.05, + quartiles=True, + rhat=True, + main=None, + xtitle=None, + xlim=None, + ylabels=None, + colors="C0", + chain_spacing=0.1, + vline=0, + gs=None, + plot_transformed=False, + plot_kwargs=None, +): """ Forest plot (model summary plot). @@ -139,15 +170,16 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, if models is None: if len(trace) > 1: - models = ['m_{}'.format(i) for i in range(len(trace))] + models = ["m_{}".format(i) for i in range(len(trace))] else: - models = [''] + models = [""] elif len(models) != len(trace): - raise ValueError("The number of names for the models does not match " - "the number of models") + raise ValueError( + "The number of names for the models does not match " "the number of models" + ) - if colors == 'cycle': - colors = ['C{}'.format(i % 10) for i in range(len(models))] + if colors == "cycle": + colors = ["C{}".format(i % 10) for i in range(len(models))] elif isinstance(colors, str): colors = [colors for i in range(len(models))] @@ -177,7 +209,7 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, gr_plot.set_xticks((1.0, 1.5, 2.0), ("1", "1.5", "2+")) gr_plot.set_xlim(0.9, 2.1) gr_plot.set_yticks([]) - gr_plot.set_title('R-hat') + gr_plot.set_title("R-hat") else: gs = gridspec.GridSpec(1, 1) @@ -187,10 +219,8 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, trace_quantiles = [] hpd_intervals = [] for tr in trace: - trace_quantiles.append(quantiles(tr, qlist, transform=transform, - squeeze=False)) - hpd_intervals.append(hpd(tr, alpha, transform=transform, - squeeze=False)) + trace_quantiles.append(quantiles(tr, qlist, transform=transform, squeeze=False)) + hpd_intervals.append(hpd(tr, alpha, transform=transform, squeeze=False)) labels = [] var = 0 @@ -200,7 +230,7 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, for v_idx, v in enumerate(varnames): for h, tr in enumerate(trace): if v not in tr.varnames: - labels.append(models[h] + ' ' + v) + labels.append(models[h] + " " + v) var += 1 else: for j, chain in enumerate(tr.chains): @@ -224,17 +254,19 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, if j == 0: if k > 1: names = _var_str(v, np.shape(value)) - names[0] = models[h] + ' ' + names[0] + names[0] = models[h] + " " + names[0] labels += names else: - labels.append(models[h] + ' ' + v) + labels.append(models[h] + " " + v) # Add spacing for each chain, if more than one - offset = [0] + [(chain_spacing * ((i + 2) / 2)) * - (-1) ** i for i in range(nchains[h] - 1)] + offset = [0] + [ + (chain_spacing * ((i + 2) / 2)) * (-1) ** i + for i in range(nchains[h] - 1) + ] # Y coordinate with offset - y = - var + offset[j] + y = -var + offset[j] # Deal with multivariate nodes @@ -242,34 +274,41 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, qs = np.moveaxis(np.array(quants), 0, -1).squeeze() for q in qs.reshape(-1, len(quants)): # Multiple y values - interval_plot = _plot_tree(interval_plot, y, q, - quartiles, colors[h], - plot_kwargs) + interval_plot = _plot_tree( + interval_plot, y, q, quartiles, colors[h], plot_kwargs + ) y -= 1 else: - interval_plot = _plot_tree(interval_plot, y, quants, - quartiles, colors[h], - plot_kwargs) + interval_plot = _plot_tree( + interval_plot, y, quants, quartiles, colors[h], plot_kwargs + ) # Genenerate Gelman-Rubin plot if plot_rhat[h] and v in tr.varnames: R = gelman_rubin(tr, [v]) if k > 1: - Rval = dict2pd(R, 'rhat').values - gr_plot.plot([min(r, 2) for r in Rval], - [-(j + var) for j in range(k)], 'o', - color=colors[h], markersize=4) + Rval = dict2pd(R, "rhat").values + gr_plot.plot( + [min(r, 2) for r in Rval], + [-(j + var) for j in range(k)], + "o", + color=colors[h], + markersize=4, + ) else: - gr_plot.plot(min(R[v], 2), -var, 'o', color=colors[h], - markersize=4) + gr_plot.plot( + min(R[v], 2), -var, "o", color=colors[h], markersize=4 + ) var += k if len(trace) > 1: - interval_plot.axhspan(var_old, y - chain_spacing - 0.5, - facecolor='k', alpha=bands[v_idx]) + interval_plot.axhspan( + var_old, y - chain_spacing - 0.5, facecolor="k", alpha=bands[v_idx] + ) if np.any(plot_rhat): - gr_plot.axhspan(var_old, y - chain_spacing - 0.5, - facecolor='k', alpha=bands[v_idx]) + gr_plot.axhspan( + var_old, y - chain_spacing - 0.5, facecolor="k", alpha=bands[v_idx] + ) var_old = y - chain_spacing - 0.5 if ylabels is not None: @@ -280,19 +319,19 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, gs.update(left=left_margin, right=0.95, top=0.9, bottom=0.05) # Define range of y-axis for forestplot and R-hat - interval_plot.set_ylim(- var + 0.5, 0.5) + interval_plot.set_ylim(-var + 0.5, 0.5) if np.any(plot_rhat): - gr_plot.set_ylim(- var + 0.5, 0.5) + gr_plot.set_ylim(-var + 0.5, 0.5) plotrange = [np.min(all_quants), np.max(all_quants)] datarange = plotrange[1] - plotrange[0] - interval_plot.set_xlim(plotrange[0] - 0.05 * datarange, - plotrange[1] + 0.05 * datarange) + interval_plot.set_xlim( + plotrange[0] - 0.05 * datarange, plotrange[1] + 0.05 * datarange + ) # Add variable labels - interval_plot.set_yticks([- l for l in range(len(labels))]) - interval_plot.set_yticklabels(labels, - fontsize=plot_kwargs.get('fontsize', None)) + interval_plot.set_yticks([-l for l in range(len(labels))]) + interval_plot.set_yticklabels(labels, fontsize=plot_kwargs.get("fontsize", None)) # Add title if main is None: @@ -302,8 +341,7 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, else: plot_title = "" - interval_plot.set_title(plot_title, - fontsize=plot_kwargs.get('fontsize', None)) + interval_plot.set_title(plot_title, fontsize=plot_kwargs.get("fontsize", None)) # Add x-axis label if xtitle is not None: @@ -319,10 +357,10 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform, ticks.tick2On = False for loc, spine in interval_plot.spines.items(): - if loc in ['left', 'right']: - spine.set_color('none') # don't draw spine + if loc in ["left", "right"]: + spine.set_color("none") # don't draw spine # Reference line - interval_plot.axvline(vline, color='k', linestyle=':') + interval_plot.axvline(vline, color="k", linestyle=":") return gs diff --git a/pymc3/plots/kdeplot.py b/pymc3/plots/kdeplot.py index be8fa48c1e..ac79672d2a 100644 --- a/pymc3/plots/kdeplot.py +++ b/pymc3/plots/kdeplot.py @@ -78,7 +78,7 @@ def fast_kde(x, bw=4.5): dx = (xmax - xmin) / (nx - 1) std_x = entropy((x - xmin) / dx) * bw if ~np.isfinite(std_x): - std_x = 0. + std_x = 0.0 grid, _ = np.histogram(x, bins=nx) scotts_factor = n ** (-0.2) @@ -86,8 +86,8 @@ def fast_kde(x, bw=4.5): kernel = gaussian(kern_nx, scotts_factor * std_x) npad = min(nx, 2 * kern_nx) - grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]]) - density = convolve(grid, kernel, mode='same')[npad: npad + nx] + grid = np.concatenate([grid[npad:0:-1], grid, grid[nx : nx - npad : -1]]) + density = convolve(grid, kernel, mode="same")[npad : npad + nx] norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5 diff --git a/pymc3/plots/pairplot.py b/pymc3/plots/pairplot.py index 516e1117f2..9c5669a0ca 100644 --- a/pymc3/plots/pairplot.py +++ b/pymc3/plots/pairplot.py @@ -9,10 +9,20 @@ from .artists import get_trace_dict, scale_text -def pairplot(trace, varnames=None, figsize=None, text_size=None, - gs=None, ax=None, hexbin=False, plot_transformed=False, - divergences=False, kwargs_divergence=None, - sub_varnames=None, **kwargs): +def pairplot( + trace, + varnames=None, + figsize=None, + text_size=None, + gs=None, + ax=None, + hexbin=False, + plot_transformed=False, + divergences=False, + kwargs_divergence=None, + sub_varnames=None, + **kwargs +): """ Plot a scatter or hexbin matrix of the sampled parameters. @@ -55,8 +65,11 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None, if plot_transformed: varnames_copy = list(trace.varnames) - remove = [get_untransformed_name(var) for var in trace.varnames - if is_transformed_name(var)] + remove = [ + get_untransformed_name(var) + for var in trace.varnames + if is_transformed_name(var) + ] try: [varnames_copy.remove(i) for i in remove] @@ -65,21 +78,21 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None, varnames = varnames_copy trace_dict = get_trace_dict( - trace, get_default_varnames( - varnames, plot_transformed)) + trace, get_default_varnames(varnames, plot_transformed) + ) else: trace_dict = get_trace_dict( - trace, get_default_varnames( - trace.varnames, plot_transformed)) + trace, get_default_varnames(trace.varnames, plot_transformed) + ) if sub_varnames is None: varnames = list(trace_dict.keys()) else: trace_dict = get_trace_dict( - trace, get_default_varnames( - trace.varnames, True)) + trace, get_default_varnames(trace.varnames, True) + ) varnames = sub_varnames else: @@ -98,30 +111,30 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None, figsize = (8 + numvars, 8 + numvars) if numvars < 2: - raise Exception( - 'Number of variables to be plotted must be 2 or greater.') + raise Exception("Number of variables to be plotted must be 2 or greater.") if numvars == 2 and ax is not None: if hexbin: - ax.hexbin(trace_dict[varnames[0]], - trace_dict[varnames[1]], mincnt=1, **kwargs) + ax.hexbin( + trace_dict[varnames[0]], trace_dict[varnames[1]], mincnt=1, **kwargs + ) else: - ax.scatter(trace_dict[varnames[0]], - trace_dict[varnames[1]], **kwargs) + ax.scatter(trace_dict[varnames[0]], trace_dict[varnames[1]], **kwargs) if divergences: try: - divergent = trace['diverging'] + divergent = trace["diverging"] except KeyError: - warnings.warn('No divergences were found.') - - diverge = (divergent == 1) - ax.scatter(trace_dict[varnames[0]][diverge], - trace_dict[varnames[1]][diverge], **kwargs_divergence) - ax.set_xlabel('{}'.format(varnames[0]), - fontsize=text_size) - ax.set_ylabel('{}'.format( - varnames[1]), fontsize=text_size) + warnings.warn("No divergences were found.") + + diverge = divergent == 1 + ax.scatter( + trace_dict[varnames[0]][diverge], + trace_dict[varnames[1]][diverge], + **kwargs_divergence + ) + ax.set_xlabel("{}".format(varnames[0]), fontsize=text_size) + ax.set_ylabel("{}".format(varnames[1]), fontsize=text_size) ax.tick_params(labelsize=text_size) if gs is None and ax is None: @@ -143,26 +156,22 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None, if divergences: try: - divergent = trace['diverging'] + divergent = trace["diverging"] except KeyError: - warnings.warn('No divergences were found.') + warnings.warn("No divergences were found.") return ax - diverge = (divergent == 1) - ax.scatter(var1[diverge], - var2[diverge], - **kwargs_divergence) + diverge = divergent == 1 + ax.scatter(var1[diverge], var2[diverge], **kwargs_divergence) if j + 1 != numvars - 1: ax.set_xticks([]) else: - ax.set_xlabel('{}'.format(varnames[i]), - fontsize=text_size) + ax.set_xlabel("{}".format(varnames[i]), fontsize=text_size) if i != 0: ax.set_yticks([]) else: - ax.set_ylabel('{}'.format( - varnames[j + 1]), fontsize=text_size) + ax.set_ylabel("{}".format(varnames[j + 1]), fontsize=text_size) ax.tick_params(labelsize=text_size) diff --git a/pymc3/plots/posteriorplot.py b/pymc3/plots/posteriorplot.py index d2ae8e5151..d3da5dad6e 100644 --- a/pymc3/plots/posteriorplot.py +++ b/pymc3/plots/posteriorplot.py @@ -9,9 +9,23 @@ from .utils import identity_transform, get_default_varnames -def plot_posterior(trace, varnames=None, transform=identity_transform, figsize=None, text_size=None, - alpha_level=0.05, round_to=3, point_estimate='mean', rope=None, - ref_val=None, kde_plot=False, plot_transformed=False, bw=4.5, ax=None, **kwargs): +def plot_posterior( + trace, + varnames=None, + transform=identity_transform, + figsize=None, + text_size=None, + alpha_level=0.05, + round_to=3, + point_estimate="mean", + rope=None, + ref_val=None, + kde_plot=False, + plot_transformed=False, + bw=4.5, + ax=None, + **kwargs +): """Plot Posterior densities in style of John K. Kruschke book. Parameters @@ -83,10 +97,19 @@ def create_axes_grid(figsize, traces): if ax is None: fig, ax = plt.subplots(figsize=figsize) - - plot_posterior_op(transform(trace), ax=ax, bw=bw, kde_plot=kde_plot, - point_estimate=point_estimate, round_to=round_to, alpha_level=alpha_level, - ref_val=ref_val, rope=rope, text_size=scale_text(figsize, text_size), **kwargs) + plot_posterior_op( + transform(trace), + ax=ax, + bw=bw, + kde_plot=kde_plot, + point_estimate=point_estimate, + round_to=round_to, + alpha_level=alpha_level, + ref_val=ref_val, + rope=rope, + text_size=scale_text(figsize, text_size), + **kwargs + ) else: if varnames is None: @@ -110,10 +133,19 @@ def create_axes_grid(figsize, traces): for idx, (a, v) in enumerate(zip(np.atleast_1d(ax), trace_dict)): tr_values = transform(trace_dict[v]) - plot_posterior_op(tr_values, ax=a, bw=bw, kde_plot=kde_plot, - point_estimate=point_estimate, round_to=round_to, - alpha_level=alpha_level, ref_val=ref_val[idx], - rope=rope[idx], text_size=scale_text(figsize, text_size), **kwargs) + plot_posterior_op( + tr_values, + ax=a, + bw=bw, + kde_plot=kde_plot, + point_estimate=point_estimate, + round_to=round_to, + alpha_level=alpha_level, + ref_val=ref_val[idx], + rope=rope[idx], + text_size=scale_text(figsize, text_size), + **kwargs + ) a.set_title(v, fontsize=scale_text(figsize, text_size)) plt.tight_layout() @@ -137,21 +169,21 @@ def plot_posterior_predictive_glm(trace, eval=None, lm=None, samples=30, **kwarg Additional keyword arguments are passed to pylab.plot(). """ if lm is None: - lm = lambda x, sample: sample['Intercept'] + sample['x'] * x + lm = lambda x, sample: sample["Intercept"] + sample["x"] * x if eval is None: eval = np.linspace(0, 1, 100) # Set default plotting arguments - if 'lw' not in kwargs and 'linewidth' not in kwargs: - kwargs['lw'] = .2 - if 'c' not in kwargs and 'color' not in kwargs: - kwargs['c'] = 'k' + if "lw" not in kwargs and "linewidth" not in kwargs: + kwargs["lw"] = 0.2 + if "c" not in kwargs and "color" not in kwargs: + kwargs["c"] = "k" for rand_loc in np.random.randint(0, len(trace), samples): rand_sample = trace[rand_loc] plt.plot(eval, lm(eval, rand_sample), **kwargs) - # Make sure to not plot label multiple times - kwargs.pop('label', None) + # Make sure to not plot label multiple times + kwargs.pop("label", None) - plt.title('Posterior predictive') + plt.title("Posterior predictive") diff --git a/pymc3/plots/traceplot.py b/pymc3/plots/traceplot.py index adbd2b732c..1f3b022f51 100644 --- a/pymc3/plots/traceplot.py +++ b/pymc3/plots/traceplot.py @@ -8,10 +8,26 @@ from .utils import identity_transform, get_default_varnames, get_axis, make_2d -def traceplot(trace, varnames=None, transform=identity_transform, figsize=None, lines=None, - combined=False, plot_transformed=False, grid=False, alpha=0.35, priors=None, - prior_alpha=1, prior_style='--', bw=4.5, ax=None, live_plot=False, - skip_first=0, refresh_every=100, roll_over=1000): +def traceplot( + trace, + varnames=None, + transform=identity_transform, + figsize=None, + lines=None, + combined=False, + plot_transformed=False, + grid=False, + alpha=0.35, + priors=None, + prior_alpha=1, + prior_style="--", + bw=4.5, + ax=None, + live_plot=False, + skip_first=0, + refresh_every=100, + roll_over=1000, +): """Plot samples histograms and values. Parameters @@ -107,11 +123,13 @@ def traceplot(trace, varnames=None, transform=identity_transform, figsize=None, x0 = len(d) - roll_over + skip_first d_stream = d[-roll_over:] width = len(d_stream) - if d.dtype.kind == 'i': + if d.dtype.kind == "i": hist_objs = histplot_op(ax[i, 0], d, alpha=alpha) colors = [h[-1][0].get_facecolor() for h in hist_objs] else: - artists = kdeplot_op(ax[i, 0], d, bw, prior, prior_alpha, prior_style)[0] + artists = kdeplot_op(ax[i, 0], d, bw, prior, prior_alpha, prior_style)[ + 0 + ] colors = [a[0].get_color() for a in artists] ax[i, 0].set_title(str(v)) ax[i, 0].grid(grid) @@ -124,17 +142,18 @@ def traceplot(trace, varnames=None, transform=identity_transform, figsize=None, if lines: try: if isinstance(lines[v], (float, int)): - line_values, colors = [lines[v]], ['r'] + line_values, colors = [lines[v]], ["r"] else: line_values = np.atleast_1d(lines[v]).ravel() if len(colors) != len(line_values): - raise AssertionError("An incorrect number of lines was specified for " - "'{}'. Expected an iterable of length {} or to " - " a scalar".format(v, len(colors))) + raise AssertionError( + "An incorrect number of lines was specified for " + "'{}'. Expected an iterable of length {} or to " + " a scalar".format(v, len(colors)) + ) for c, l in zip(colors, line_values): ax[i, 0].axvline(x=l, color=c, lw=1.5, alpha=0.75) - ax[i, 1].axhline(y=l, color=c, - lw=1.5, alpha=alpha) + ax[i, 1].axhline(y=l, color=c, lw=1.5, alpha=alpha) except KeyError: pass if live_plot: diff --git a/pymc3/plots/utils.py b/pymc3/plots/utils.py index 9b96931936..5fb443b688 100644 --- a/pymc3/plots/utils.py +++ b/pymc3/plots/utils.py @@ -3,8 +3,9 @@ except ImportError: # mpl is optional pass import numpy as np + # plotting utilities can all be in this namespace -from ..util import get_default_varnames # pylint: disable=unused-import +from ..util import get_default_varnames # pylint: disable=unused-import def identity_transform(x): @@ -29,7 +30,7 @@ def get_axis(ax, default_rows, default_columns, **default_kwargs): if ax is None: _, ax = plt.subplots(*default_shape, **default_kwargs) elif ax.shape != default_shape: - raise ValueError('Subplots with shape %r required' % (default_shape,)) + raise ValueError("Subplots with shape %r required" % (default_shape,)) return ax @@ -39,5 +40,5 @@ def make_2d(a): # flatten out dimensions beyond the first n = a.shape[0] newshape = np.product(a.shape[1:]).astype(int) - a = a.reshape((n, newshape), order='F') + a = a.reshape((n, newshape), order="F") return a diff --git a/pymc3/sampling.py b/pymc3/sampling.py index ac54def005..c0da49656b 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -14,10 +14,24 @@ from .backends.ndarray import NDArray from .distributions.distribution import draw_values from .model import modelcontext, Point, all_continuous -from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, - BinaryGibbsMetropolis, CategoricalGibbsMetropolis, - Slice, CompoundStep, arraystep, smc) -from .util import update_start_vals, get_untransformed_name, is_transformed_name, get_default_varnames +from .step_methods import ( + NUTS, + HamiltonianMC, + Metropolis, + BinaryMetropolis, + BinaryGibbsMetropolis, + CategoricalGibbsMetropolis, + Slice, + CompoundStep, + arraystep, + smc, +) +from .util import ( + update_start_vals, + get_untransformed_name, + is_transformed_name, + get_default_varnames, +) from .vartypes import discrete_types from pymc3.step_methods.hmc import quadpotential from pymc3 import plots @@ -25,17 +39,32 @@ from tqdm import tqdm import sys + sys.setrecursionlimit(10000) -__all__ = ['sample', 'iter_sample', 'sample_posterior_predictive', - 'sample_posterior_predictive_w', 'init_nuts', - 'sample_prior_predictive', 'sample_ppc', 'sample_ppc_w'] +__all__ = [ + "sample", + "iter_sample", + "sample_posterior_predictive", + "sample_posterior_predictive_w", + "init_nuts", + "sample_prior_predictive", + "sample_ppc", + "sample_ppc_w", +] -STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, - BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis) +STEP_METHODS = ( + NUTS, + HamiltonianMC, + Metropolis, + BinaryMetropolis, + BinaryGibbsMetropolis, + Slice, + CategoricalGibbsMetropolis, +) -_log = logging.getLogger('pymc3') +_log = logging.getLogger("pymc3") def instantiate_steppers(model, steps, selected_steps, step_kwargs=None): @@ -76,7 +105,7 @@ def instantiate_steppers(model, steps, selected_steps, step_kwargs=None): unused_args = set(step_kwargs).difference(used_keys) if unused_args: - raise ValueError('Unused step method arguments: %s' % unused_args) + raise ValueError("Unused step method arguments: %s" % unused_args) if len(steps) == 1: steps = steps[0] @@ -84,8 +113,7 @@ def instantiate_steppers(model, steps, selected_steps, step_kwargs=None): return steps -def assign_step_methods(model, step=None, methods=STEP_METHODS, - step_kwargs=None): +def assign_step_methods(model, step=None, methods=STEP_METHODS, step_kwargs=None): """Assign model variables to appropriate step methods. Passing a specified model will auto-assign its constituent stochastic @@ -140,14 +168,15 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS, if has_gradient: try: tg.grad(model.logpt, var) - except (AttributeError, - NotImplementedError, - tg.NullTypeGradError): + except (AttributeError, NotImplementedError, tg.NullTypeGradError): has_gradient = False # select the best method - selected = max(methods, key=lambda method, - var=var, has_gradient=has_gradient: - method._competence(var, has_gradient)) + selected = max( + methods, + key=lambda method, var=var, has_gradient=has_gradient: method._competence( + var, has_gradient + ), + ) selected_steps[selected].append(var) return instantiate_steppers(model, steps, selected_steps, step_kwargs) @@ -155,17 +184,23 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS, def _print_step_hierarchy(s, level=0): if isinstance(s, (list, tuple)): - _log.info('>' * level + 'list') + _log.info(">" * level + "list") for i in s: - _print_step_hierarchy(i, level+1) + _print_step_hierarchy(i, level + 1) elif isinstance(s, CompoundStep): - _log.info('>' * level + 'CompoundStep') + _log.info(">" * level + "CompoundStep") for i in s.methods: - _print_step_hierarchy(i, level+1) + _print_step_hierarchy(i, level + 1) else: - varnames = ', '.join([get_untransformed_name(v.name) if is_transformed_name(v.name) - else v.name for v in s.vars]) - _log.info('>' * level + '{}: [{}]'.format(s.__class__.__name__, varnames)) + varnames = ", ".join( + [ + get_untransformed_name(v.name) + if is_transformed_name(v.name) + else v.name + for v in s.vars + ] + ) + _log.info(">" * level + "{}: [{}]".format(s.__class__.__name__, varnames)) def _cpu_count(): @@ -177,9 +212,11 @@ def _cpu_count(): """ try: import psutil + cpus = psutil.cpu_count(False) except ImportError: import multiprocessing + try: cpus = multiprocessing.cpu_count() // 2 except NotImplementedError: @@ -189,10 +226,29 @@ def _cpu_count(): return cpus -def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=None, chain_idx=0, - chains=None, cores=None, tune=500, nuts_kwargs=None, step_kwargs=None, progressbar=True, - model=None, random_seed=None, live_plot=False, discard_tuned_samples=True, - live_plot_kwargs=None, compute_convergence_checks=True, use_mmap=False, **kwargs): +def sample( + draws=500, + step=None, + init="auto", + n_init=200000, + start=None, + trace=None, + chain_idx=0, + chains=None, + cores=None, + tune=500, + nuts_kwargs=None, + step_kwargs=None, + progressbar=True, + model=None, + random_seed=None, + live_plot=False, + discard_tuned_samples=True, + live_plot_kwargs=None, + compute_convergence_checks=True, + use_mmap=False, + **kwargs +): """Draw samples from the posterior using the given step methods. Multiple step methods are supported via compound step methods. @@ -323,25 +379,29 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N if isinstance(step, pm.step_methods.smc.SMC): if step_kwargs is None: step_kwargs = {} - test_folder = mkdtemp(prefix='SMC_TEST') - trace = smc.sample_smc(draws=draws, - step=step, - progressbar=progressbar, - model=model, - random_seed=random_seed) + test_folder = mkdtemp(prefix="SMC_TEST") + trace = smc.sample_smc( + draws=draws, + step=step, + progressbar=progressbar, + model=model, + random_seed=random_seed, + ) else: if cores is None: cores = min(4, _cpu_count()) - if 'njobs' in kwargs: - cores = kwargs['njobs'] + if "njobs" in kwargs: + cores = kwargs["njobs"] warnings.warn( "The njobs argument has been deprecated. Use cores instead.", - DeprecationWarning) - if 'nchains' in kwargs: - chains = kwargs['nchains'] + DeprecationWarning, + ) + if "nchains" in kwargs: + chains = kwargs["nchains"] warnings.warn( "The nchains argument has been deprecated. Use chains instead.", - DeprecationWarning) + DeprecationWarning, + ) if chains is None: chains = max(2, cores) if isinstance(start, dict): @@ -356,12 +416,14 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N random_seed = [np.random.randint(2 ** 30) for _ in range(chains)] if not isinstance(random_seed, Iterable): raise TypeError( - 'Invalid value for `random_seed`. Must be tuple, list or int') - if 'chain' in kwargs: - chain_idx = kwargs['chain'] + "Invalid value for `random_seed`. Must be tuple, list or int" + ) + if "chain" in kwargs: + chain_idx = kwargs["chain"] warnings.warn( "The chain argument has been deprecated. Use chain_idx instead.", - DeprecationWarning) + DeprecationWarning, + ) if start is not None: for start_vals in start: @@ -380,27 +442,35 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N if nuts_kwargs is not None: if step_kwargs is not None: raise ValueError("Specify only one of step_kwargs and nuts_kwargs") - step_kwargs = {'nuts': nuts_kwargs} + step_kwargs = {"nuts": nuts_kwargs} if model.ndim == 0: - raise ValueError('The model does not contain any free variables.') + raise ValueError("The model does not contain any free variables.") if step is None and init is not None and all_continuous(model.vars): try: # By default, try to use NUTS - _log.info('Auto-assigning NUTS sampler...') + _log.info("Auto-assigning NUTS sampler...") args = step_kwargs if step_kwargs is not None else {} - args = args.get('nuts', {}) - start_, step = init_nuts(init=init, chains=chains, n_init=n_init, - model=model, random_seed=random_seed, - progressbar=progressbar, **args) + args = args.get("nuts", {}) + start_, step = init_nuts( + init=init, + chains=chains, + n_init=n_init, + model=model, + random_seed=random_seed, + progressbar=progressbar, + **args + ) if start is None: start = start_ except (AttributeError, NotImplementedError, tg.NullTypeGradError): # gradient computation failed - _log.info("Initializing NUTS failed. " - "Falling back to elementwise auto-assignment.") - _log.debug('Exception in init nuts', exec_info=True) + _log.info( + "Initializing NUTS failed. " + "Falling back to elementwise auto-assignment." + ) + _log.debug("Exception in init nuts", exec_info=True) step = assign_step_methods(model, step, step_kwargs=step_kwargs) else: step = assign_step_methods(model, step, step_kwargs=step_kwargs) @@ -412,50 +482,58 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N if isinstance(start, dict): start = [start] * chains - sample_args = {'draws': draws, - 'step': step, - 'start': start, - 'trace': trace, - 'chain': chain_idx, - 'chains': chains, - 'tune': tune, - 'progressbar': progressbar, - 'model': model, - 'random_seed': random_seed, - 'live_plot': live_plot, - 'live_plot_kwargs': live_plot_kwargs, - 'cores': cores, - 'use_mmap': use_mmap} + sample_args = { + "draws": draws, + "step": step, + "start": start, + "trace": trace, + "chain": chain_idx, + "chains": chains, + "tune": tune, + "progressbar": progressbar, + "model": model, + "random_seed": random_seed, + "live_plot": live_plot, + "live_plot_kwargs": live_plot_kwargs, + "cores": cores, + "use_mmap": use_mmap, + } sample_args.update(kwargs) - has_population_samplers = np.any([isinstance(m, arraystep.PopulationArrayStepShared) - for m in (step.methods if isinstance(step, CompoundStep) else [step])]) + has_population_samplers = np.any( + [ + isinstance(m, arraystep.PopulationArrayStepShared) + for m in (step.methods if isinstance(step, CompoundStep) else [step]) + ] + ) parallel = cores > 1 and chains > 1 and not has_population_samplers if parallel: - _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, cores)) + _log.info( + "Multiprocess sampling ({} chains in {} jobs)".format(chains, cores) + ) _print_step_hierarchy(step) try: trace = _mp_sample(**sample_args) except pickle.PickleError: _log.warning("Could not pickle model, sampling singlethreaded.") - _log.debug('Pickling error:', exec_info=True) + _log.debug("Pickling error:", exec_info=True) parallel = False except AttributeError as e: if str(e).startswith("AttributeError: Can't pickle"): _log.warning("Could not pickle model, sampling singlethreaded.") - _log.debug('Pickling error:', exec_info=True) + _log.debug("Pickling error:", exec_info=True) parallel = False else: raise if not parallel: if has_population_samplers: - _log.info('Population sampling ({} chains)'.format(chains)) + _log.info("Population sampling ({} chains)".format(chains)) _print_step_hierarchy(step) trace = _sample_population(**sample_args) else: - _log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) + _log.info("Sequential sampling ({} chains in 1 job)".format(chains)) _print_step_hierarchy(step) trace = _sample_many(**sample_args) @@ -463,8 +541,10 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N trace = trace[discard:] if compute_convergence_checks: - if draws-tune < 100: - warnings.warn("The number of samples is too small to check convergence reliably.") + if draws - tune < 100: + warnings.warn( + "The number of samples is too small to check convergence reliably." + ) else: trace.report._run_convergence_checks(trace, model) @@ -476,7 +556,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N def _check_start_shape(model, start): if not isinstance(start, dict): raise TypeError("start argument must be a dict or an array-like of dicts") - e = '' + e = "" for var in model.vars: if var.name in start.keys(): var_shape = var.shape.tag.test_value @@ -490,23 +570,28 @@ def _check_start_shape(model, start): else: # if model var has a specified shape if var_shape: - e += "\nExpected shape {} for var " \ - "'{}', got scalar {}".format( - tuple(var_shape), var.name, start[var.name] - ) + e += "\nExpected shape {} for var " "'{}', got scalar {}".format( + tuple(var_shape), var.name, start[var.name] + ) - if e != '': + if e != "": raise ValueError("Bad shape for start argument:{}".format(e)) def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs): traces = [] for i in range(chains): - trace = _sample(draws=draws, chain=chain + i, start=start[i], - step=step, random_seed=random_seed[i], **kwargs) + trace = _sample( + draws=draws, + chain=chain + i, + start=start[i], + step=step, + random_seed=random_seed[i], + **kwargs + ) if trace is None: if len(traces) == 0: - raise ValueError('Sampling stopped before a sample was created.') + raise ValueError("Sampling stopped before a sample was created.") else: break elif len(trace) < draws: @@ -518,12 +603,31 @@ def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs): return MultiTrace(traces) -def _sample_population(draws, chain, chains, start, random_seed, step, tune, - model, progressbar=None, parallelize=False, **kwargs): +def _sample_population( + draws, + chain, + chains, + start, + random_seed, + step, + tune, + model, + progressbar=None, + parallelize=False, + **kwargs +): # create the generator that iterates all chains in parallel chains = [chain + c for c in range(chains)] - sampling = _prepare_iter_population(draws, chains, step, start, parallelize, - tune=tune, model=model, random_seed=random_seed) + sampling = _prepare_iter_population( + draws, + chains, + step, + start, + parallelize, + tune=tune, + model=model, + random_seed=random_seed, + ) if progressbar: sampling = tqdm(sampling, total=draws) @@ -535,14 +639,24 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune, return MultiTrace(latest_traces) -def _sample(chain, progressbar, random_seed, start, draws=None, step=None, - trace=None, tune=None, model=None, live_plot=False, - live_plot_kwargs=None, **kwargs): - skip_first = kwargs.get('skip_first', 0) - refresh_every = kwargs.get('refresh_every', 100) - - sampling = _iter_sample(draws, step, start, trace, chain, - tune, model, random_seed) +def _sample( + chain, + progressbar, + random_seed, + start, + draws=None, + step=None, + trace=None, + tune=None, + model=None, + live_plot=False, + live_plot_kwargs=None, + **kwargs +): + skip_first = kwargs.get("skip_first", 0) + refresh_every = kwargs.get("refresh_every", 100) + + sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed) if progressbar: sampling = tqdm(sampling, total=draws) try: @@ -556,7 +670,9 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None, if it == skip_first: ax = plots.traceplot(trace, live_plot=False, **live_plot_kwargs) elif (it - skip_first) % refresh_every == 0 or it == draws - 1: - plots.traceplot(trace, ax=ax, live_plot=True, **live_plot_kwargs) + plots.traceplot( + trace, ax=ax, live_plot=True, **live_plot_kwargs + ) except KeyboardInterrupt: pass finally: @@ -565,8 +681,16 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None, return strace -def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, - model=None, random_seed=None): +def iter_sample( + draws, + step, + start=None, + trace=None, + chain=0, + tune=None, + model=None, + random_seed=None, +): """Generator that returns a trace on each iteration using the given step method. Multiple step methods supported via compound step method returns the amount of time taken. @@ -600,20 +724,27 @@ def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, for trace in iter_sample(500, step): ... """ - sampling = _iter_sample(draws, step, start, trace, chain, tune, - model, random_seed) + sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed) for i, strace in enumerate(sampling): - yield MultiTrace([strace[:i + 1]]) - - -def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, - model=None, random_seed=None): + yield MultiTrace([strace[: i + 1]]) + + +def _iter_sample( + draws, + step, + start=None, + trace=None, + chain=0, + tune=None, + model=None, + random_seed=None, +): model = modelcontext(model) draws = int(draws) if random_seed is not None: np.random.seed(random_seed) if draws < 1: - raise ValueError('Argument `draws` must be greater than 0.') + raise ValueError("Argument `draws` must be greater than 0.") if start is None: start = {} @@ -654,7 +785,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, yield strace except KeyboardInterrupt: strace.close() - if hasattr(step, 'warnings'): + if hasattr(step, "warnings"): warns = step.warnings() strace._add_warnings(warns) raise @@ -663,7 +794,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, raise else: strace.close() - if hasattr(step, 'warnings'): + if hasattr(step, "warnings"): warns = step.warnings() strace._add_warnings(warns) @@ -692,15 +823,16 @@ def __init__(self, steppers, parallelize): if parallelize and sys.version_info >= (3, 4): try: # configure a child process for each stepper - _log.info('Attempting to parallelize chains.') + _log.info("Attempting to parallelize chains.") import multiprocessing + for c, stepper in enumerate(tqdm(steppers)): slave_end, master_end = multiprocessing.Pipe() stepper_dumps = pickle.dumps(stepper, protocol=4) process = multiprocessing.Process( target=self.__class__._run_slave, args=(c, stepper_dumps, slave_end), - name='ChainWalker{}'.format(c) + name="ChainWalker{}".format(c), ) # we want the child process to exit if the parent is terminated process.daemon = True @@ -712,18 +844,23 @@ def __init__(self, steppers, parallelize): self._processes.append(process) self.is_parallelized = True except Exception: - _log.info('Population parallelization failed. ' - 'Falling back to sequential stepping of chains.') - _log.debug('Error was: ', exec_info=True) + _log.info( + "Population parallelization failed. " + "Falling back to sequential stepping of chains." + ) + _log.debug("Error was: ", exec_info=True) else: if parallelize: - warnings.warn('Population parallelization is only supported ' - 'on Python 3.4 and higher. All {} chains will ' - 'run sequentially on one process.' - .format(self.nchains)) + warnings.warn( + "Population parallelization is only supported " + "on Python 3.4 and higher. All {} chains will " + "run sequentially on one process.".format(self.nchains) + ) else: - _log.info('Chains are not parallelized. You can enable this by passing ' - 'pm.sample(parallelize=True).') + _log.info( + "Chains are not parallelized. You can enable this by passing " + "pm.sample(parallelize=True)." + ) return super(PopulationStepper, self).__init__() def __enter__(self): @@ -738,7 +875,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): for process in self._processes: process.join(timeout=3) except Exception: - _log.warning('Termination failed.') + _log.warning("Termination failed.") return @staticmethod @@ -762,7 +899,9 @@ def _run_slave(c, stepper_dumps, slave_end): # but rather a CompoundStep. PopulationArrayStepShared.population # has to be updated, therefore we identify the substeppers first. population_steppers = [] - for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]): + for sm in ( + stepper.methods if isinstance(stepper, CompoundStep) else [stepper] + ): if isinstance(sm, arraystep.PopulationArrayStepShared): population_steppers.append(sm) while True: @@ -781,7 +920,7 @@ def _run_slave(c, stepper_dumps, slave_end): update = stepper.step(population[c]) slave_end.send(update) except Exception: - _log.exception('ChainWalker{}'.format(c)) + _log.exception("ChainWalker{}".format(c)) return def step(self, tune_stop, population): @@ -814,8 +953,9 @@ def step(self, tune_stop, population): return updates -def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None, - model=None, random_seed=None): +def _prepare_iter_population( + draws, chains, step, start, parallelize, tune=None, model=None, random_seed=None +): """Prepares a PopulationStepper and traces for population sampling. Returns @@ -830,7 +970,7 @@ def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None, if random_seed is not None: np.random.seed(random_seed) if draws < 1: - raise ValueError('Argument `draws` should be above 0.') + raise ValueError("Argument `draws` should be above 0.") # The initialization of traces, samplers and points must happen in the right order: # 1. traces are initialized and update_start_vals configures variable transforms @@ -862,7 +1002,7 @@ def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None, else: chainstep = copy(step) # link population samplers to the shared population state - for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]): + for sm in chainstep.methods if isinstance(step, CompoundStep) else [chainstep]: if isinstance(sm, arraystep.PopulationArrayStepShared): sm.link_population(population, c) steppers[c] = chainstep @@ -922,7 +1062,7 @@ def _iter_population(draws, tune, popstep, steppers, traces, points): except KeyboardInterrupt: for c, strace in enumerate(traces): strace.close() - if hasattr(steppers[c], 'report'): + if hasattr(steppers[c], "report"): steppers[c].report._finalize(strace) raise except BaseException: @@ -932,7 +1072,7 @@ def _iter_population(draws, tune, popstep, steppers, traces, points): else: for c, strace in enumerate(traces): strace.close() - if hasattr(steppers[c], 'report'): + if hasattr(steppers[c], "report"): steppers[c].report._finalize(strace) @@ -948,18 +1088,30 @@ def _choose_backend(trace, chain, shortcuts=None, **kwds): shortcuts = pm.backends._shortcuts try: - backend = shortcuts[trace]['backend'] - name = shortcuts[trace]['name'] + backend = shortcuts[trace]["backend"] + name = shortcuts[trace]["name"] return backend(name, **kwds) except TypeError: return NDArray(vars=trace, **kwds) except KeyError: - raise ValueError('Argument `trace` is invalid.') - - -def _mp_sample(draws, tune, step, chains, cores, chain, random_seed, - start, progressbar, trace=None, model=None, use_mmap=False, - **kwargs): + raise ValueError("Argument `trace` is invalid.") + + +def _mp_sample( + draws, + tune, + step, + chains, + cores, + chain, + random_seed, + start, + progressbar, + trace=None, + model=None, + use_mmap=False, + **kwargs +): if sys.version_info.major >= 3: import pymc3.parallel_sampling as ps @@ -983,8 +1135,8 @@ def _mp_sample(draws, tune, step, chains, cores, chain, random_seed, traces.append(strace) sampler = ps.ParallelSampler( - draws, tune, chains, cores, random_seed, start, step, - chain, progressbar) + draws, tune, chains, cores, random_seed, start, step, chain, progressbar + ) try: with sampler: for draw in sampler: @@ -1010,9 +1162,16 @@ def _mp_sample(draws, tune, step, chains, cores, chain, random_seed, pbars = [progressbar] + [False] * (chains - 1) jobs = ( delayed(_sample)( - chain=args[0], progressbar=args[1], random_seed=args[2], - start=args[3], draws=draws, step=step, trace=trace, - tune=tune, model=model, **kwargs + chain=args[0], + progressbar=args[1], + random_seed=args[2], + start=args[3], + draws=draws, + step=step, + trace=trace, + tune=tune, + model=model, + **kwargs ) for args in zip(chain_nums, pbars, random_seed, start) ) @@ -1032,7 +1191,7 @@ def _choose_chains(traces, tune): lengths = [max(0, len(trace) - tune) for trace in traces] if not sum(lengths): - raise ValueError('Not enough samples to build a trace.') + raise ValueError("Not enough samples to build a trace.") idxs = np.argsort(lengths)[::-1] l_sort = np.array(lengths)[idxs] @@ -1059,8 +1218,15 @@ def stop_tuning(step): return step -def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size=None, - random_seed=None, progressbar=True): +def sample_posterior_predictive( + trace, + samples=None, + model=None, + vars=None, + size=None, + random_seed=None, + progressbar=True, +): """Generate posterior predictive samples from a model given a trace. Parameters @@ -1116,8 +1282,9 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size varnames = [var.name for var in vars] # draw once to inspect the shape - var_values = list(zip(varnames, - draw_values(vars, point=model.test_point, size=size))) + var_values = list( + zip(varnames, draw_values(vars, point=model.test_point, size=size)) + ) ppc_trace = defaultdict(list) for varname, value in var_values: ppc_trace[varname] = np.zeros((samples,) + value.shape, value.dtype) @@ -1146,13 +1313,14 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size def sample_ppc(*args, **kwargs): """This method is deprecated. Please use :func:`~sampling.sample_posterior_predictive`""" - message = 'sample_ppc() is deprecated. Please use sample_posterior_predictive()' + message = "sample_ppc() is deprecated. Please use sample_posterior_predictive()" warnings.warn(message, DeprecationWarning, stacklevel=2) return sample_posterior_predictive(*args, **kwargs) -def sample_posterior_predictive_w(traces, samples=None, models=None, weights=None, - random_seed=None, progressbar=True): +def sample_posterior_predictive_w( + traces, samples=None, models=None, weights=None, random_seed=None, progressbar=True +): """Generate weighted posterior predictive samples from a list of models and a list of traces according to a set of weights. @@ -1194,22 +1362,21 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non weights = [1] * len(traces) if len(traces) != len(weights): - raise ValueError('The number of traces and weights should be the same') + raise ValueError("The number of traces and weights should be the same") if len(models) != len(weights): - raise ValueError('The number of models and weights should be the same') + raise ValueError("The number of models and weights should be the same") length_morv = len(models[0].observed_RVs) if not all(len(i.observed_RVs) == length_morv for i in models): - raise ValueError( - 'The number of observed RVs should be the same for all models') + raise ValueError("The number of observed RVs should be the same for all models") weights = np.asarray(weights) p = weights / np.sum(weights) min_tr = min([len(i) * i.nchains for i in traces]) - n = (min_tr * p).astype('int') + n = (min_tr * p).astype("int") # ensure n sum up to min_tr idx = np.argmax(n) n[idx] = n[idx] + min_tr - np.sum(n) @@ -1239,7 +1406,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non if len(lengths) == 1: size = [None for i in variables] elif len(lengths) > 2: - raise ValueError('Observed variables could not be broadcast together') + raise ValueError("Observed variables could not be broadcast together") else: size = [] x = np.zeros(shape=lengths[0]) @@ -1268,10 +1435,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non var = variables[idx] # TODO sample_posterior_predictive_w is currently only work for model with # one observed. - ppc[var.name].append(draw_values([var], - point=param, - size=size[idx] - )[0]) + ppc[var.name].append(draw_values([var], point=param, size=size[idx])[0]) except KeyboardInterrupt: pass @@ -1285,7 +1449,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non def sample_ppc_w(*args, **kwargs): """This method is deprecated. Please use :func:`~sampling.sample_posterior_predictive_w`""" - message = 'sample_ppc() is deprecated. Please use sample_posterior_predictive_w()' + message = "sample_ppc() is deprecated. Please use sample_posterior_predictive_w()" warnings.warn(message, DeprecationWarning, stacklevel=2) return sample_posterior_predictive_w(*args, **kwargs) @@ -1331,12 +1495,21 @@ def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None elif is_transformed_name(var_name): untransformed = get_untransformed_name(var_name) if untransformed in data: - prior[var_name] = model[untransformed].transformation.forward_val(data[untransformed]) + prior[var_name] = model[untransformed].transformation.forward_val( + data[untransformed] + ) return prior -def init_nuts(init='auto', chains=1, n_init=500000, model=None, - random_seed=None, progressbar=True, **kwargs): +def init_nuts( + init="auto", + chains=1, + n_init=500000, + model=None, + random_seed=None, + progressbar=True, + **kwargs +): """Set up the mass matrix initialization for NUTS. NUTS convergence and sampling speed is extremely dependent on the @@ -1386,42 +1559,40 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, """ model = modelcontext(model) - vars = kwargs.get('vars', model.vars) + vars = kwargs.get("vars", model.vars) if set(vars) != set(model.vars): - raise ValueError('Must use init_nuts on all variables of a model.') + raise ValueError("Must use init_nuts on all variables of a model.") if not all_continuous(vars): - raise ValueError('init_nuts can only be used for models with only ' - 'continuous variables.') + raise ValueError( + "init_nuts can only be used for models with only " "continuous variables." + ) if not isinstance(init, str): - raise TypeError('init must be a string.') + raise TypeError("init must be a string.") if init is not None: init = init.lower() - if init == 'auto': - init = 'jitter+adapt_diag' + if init == "auto": + init = "jitter+adapt_diag" - _log.info('Initializing NUTS using {}...'.format(init)) + _log.info("Initializing NUTS using {}...".format(init)) if random_seed is not None: random_seed = int(np.atleast_1d(random_seed)[0]) np.random.seed(random_seed) cb = [ - pm.callbacks.CheckParametersConvergence( - tolerance=1e-2, diff='absolute'), - pm.callbacks.CheckParametersConvergence( - tolerance=1e-2, diff='relative'), + pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"), + pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"), ] - if init == 'adapt_diag': + if init == "adapt_diag": start = [model.test_point] * chains mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) - potential = quadpotential.QuadPotentialDiagAdapt( - model.ndim, mean, var, 10) - elif init == 'jitter+adapt_diag': + potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, var, 10) + elif init == "jitter+adapt_diag": start = [] for _ in range(chains): mean = {var: val.copy() for var, val in model.test_point.items()} @@ -1430,12 +1601,13 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, start.append(mean) mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) - potential = quadpotential.QuadPotentialDiagAdapt( - model.ndim, mean, var, 10) - elif init == 'advi+adapt_diag_grad': + potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, var, 10) + elif init == "advi+adapt_diag_grad": approx = pm.fit( random_seed=random_seed, - n=n_init, method='advi', model=model, + n=n_init, + method="advi", + model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, @@ -1448,11 +1620,14 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdaptGrad( - model.ndim, mean, cov, weight) - elif init == 'advi+adapt_diag': + model.ndim, mean, cov, weight + ) + elif init == "advi+adapt_diag": approx = pm.fit( random_seed=random_seed, - n=n_init, method='advi', model=model, + n=n_init, + method="advi", + model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, @@ -1464,51 +1639,52 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 - potential = quadpotential.QuadPotentialDiagAdapt( - model.ndim, mean, cov, weight) - elif init == 'advi': + potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, cov, weight) + elif init == "advi": approx = pm.fit( random_seed=random_seed, - n=n_init, method='advi', model=model, + n=n_init, + method="advi", + model=model, callbacks=cb, progressbar=progressbar, - obj_optimizer=pm.adagrad_window + obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 potential = quadpotential.QuadPotentialDiag(cov) - elif init == 'advi_map': + elif init == "advi_map": start = pm.find_MAP(include_transformed=True) approx = pm.MeanField(model=model, start=start) pm.fit( random_seed=random_seed, - n=n_init, method=pm.KLqp(approx), + n=n_init, + method=pm.KLqp(approx), callbacks=cb, progressbar=progressbar, - obj_optimizer=pm.adagrad_window + obj_optimizer=pm.adagrad_window, ) start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 potential = quadpotential.QuadPotentialDiag(cov) - elif init == 'map': + elif init == "map": start = pm.find_MAP(include_transformed=True) cov = pm.find_hessian(point=start) start = [start] * chains potential = quadpotential.QuadPotentialFull(cov) - elif init == 'nuts': - init_trace = pm.sample(draws=n_init, step=pm.NUTS(), - tune=n_init // 2, - random_seed=random_seed) + elif init == "nuts": + init_trace = pm.sample( + draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed + ) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, chains)) potential = quadpotential.QuadPotentialFull(cov) else: - raise ValueError( - 'Unknown initializer: {}.'.format(init)) + raise ValueError("Unknown initializer: {}.".format(init)) step = pm.NUTS(potential=potential, model=model, **kwargs) diff --git a/pymc3/stats.py b/pymc3/stats.py index 445b54b0c7..cb20348d76 100644 --- a/pymc3/stats.py +++ b/pymc3/stats.py @@ -16,14 +16,25 @@ import pymc3 as pm from pymc3.theanof import floatX -if pkg_resources.get_distribution('scipy').version < '1.0.0': +if pkg_resources.get_distribution("scipy").version < "1.0.0": from scipy.misc import logsumexp else: from scipy.special import logsumexp -__all__ = ['autocorr', 'autocov', 'waic', 'loo', 'hpd', 'quantiles', - 'mc_error', 'summary', 'compare', 'bfmi', 'r2_score'] +__all__ = [ + "autocorr", + "autocov", + "waic", + "loo", + "hpd", + "quantiles", + "mc_error", + "summary", + "compare", + "bfmi", + "r2_score", +] def statfunc(f): @@ -34,23 +45,23 @@ def statfunc(f): def wrapped_f(pymc3_obj, *args, **kwargs): try: - vars = kwargs.pop('vars', pymc3_obj.varnames) - chains = kwargs.pop('chains', pymc3_obj.chains) + vars = kwargs.pop("vars", pymc3_obj.varnames) + chains = kwargs.pop("chains", pymc3_obj.chains) except AttributeError: # If fails, assume that raw data was passed. return f(pymc3_obj, *args, **kwargs) - burn = kwargs.pop('burn', 0) - thin = kwargs.pop('thin', 1) - combine = kwargs.pop('combine', False) + burn = kwargs.pop("burn", 0) + thin = kwargs.pop("thin", 1) + combine = kwargs.pop("combine", False) # Remove outer level chain keys if only one chain) - squeeze = kwargs.pop('squeeze', True) + squeeze = kwargs.pop("squeeze", True) results = {chain: {} for chain in chains} for var in vars: - samples = pymc3_obj.get_values(var, chains=chains, burn=burn, - thin=thin, combine=combine, - squeeze=False) + samples = pymc3_obj.get_values( + var, chains=chains, burn=burn, thin=thin, combine=combine, squeeze=False + ) for chain, data in zip(chains, samples): results[chain][var] = f(np.squeeze(data), *args, **kwargs) @@ -82,7 +93,7 @@ def autocorr(x, lag=None): y = x - x.mean() n = len(y) result = fftconvolve(y, y[::-1]) - acorr = result[len(result) // 2:] + acorr = result[len(result) // 2 :] acorr /= np.arange(n, 0, -1) acorr /= acorr[0] if lag is None: @@ -91,7 +102,8 @@ def autocorr(x, lag=None): warnings.warn( "The `lag` argument has been deprecated. If you want to get " "the value of a specific lag please call `autocorr(x)[lag]`.", - DeprecationWarning) + DeprecationWarning, + ) return acorr[lag] @@ -117,7 +129,8 @@ def autocov(x, lag=None): warnings.warn( "The `lag` argument has been deprecated. If you want to get " "the value of a specific lag please call `autocov(x)[lag]`.", - DeprecationWarning) + DeprecationWarning, + ) return acov[lag] @@ -144,7 +157,7 @@ def _log_post_trace(trace, model=None, progressbar=False): def logp_vals_point(pt): if len(model.observed_RVs) == 0: - return floatX(np.array([], dtype='d')) + return floatX(np.array([], dtype="d")) logp_vals = [] for var, logp in cached: @@ -203,20 +216,22 @@ def waic(trace, model=None, pointwise=False, progressbar=False): log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: - raise ValueError('The model does not contain observed values.') + raise ValueError("The model does not contain observed values.") lppd_i = logsumexp(log_py, axis=0, b=1.0 / log_py.shape[0]) vars_lpd = np.var(log_py, axis=0) warn_mg = 0 if np.any(vars_lpd > 0.4): - warnings.warn("""For one or more samples the posterior variance of the + warnings.warn( + """For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail see http://arxiv.org/abs/1507.04544 for details - """) + """ + ) warn_mg = 1 - waic_i = - 2 * (lppd_i - vars_lpd) + waic_i = -2 * (lppd_i - vars_lpd) waic_se = np.sqrt(len(waic_i) * np.var(waic_i)) @@ -226,14 +241,16 @@ def waic(trace, model=None, pointwise=False, progressbar=False): if pointwise: if np.equal(waic, waic_i).all(): - warnings.warn("""The point-wise WAIC is the same with the sum WAIC, + warnings.warn( + """The point-wise WAIC is the same with the sum WAIC, please double check the Observed RV in your model to make sure it returns element-wise logp. - """) - WAIC_r = namedtuple('WAIC_r', 'WAIC, WAIC_se, p_WAIC, var_warn, WAIC_i') + """ + ) + WAIC_r = namedtuple("WAIC_r", "WAIC, WAIC_se, p_WAIC, var_warn, WAIC_i") return WAIC_r(waic, waic_se, p_waic, warn_mg, waic_i) else: - WAIC_r = namedtuple('WAIC_r', 'WAIC, WAIC_se, p_WAIC, var_warn') + WAIC_r = namedtuple("WAIC_r", "WAIC, WAIC_se, p_WAIC, var_warn") return WAIC_r(waic, waic_se, p_waic, warn_mg) @@ -273,46 +290,50 @@ def loo(trace, model=None, pointwise=False, reff=None, progressbar=False): if reff is None: if trace.nchains == 1: - reff = 1. + reff = 1.0 else: eff = pm.effective_n(trace) - eff_ave = pm.stats.dict2pd(eff, 'eff').mean() + eff_ave = pm.stats.dict2pd(eff, "eff").mean() samples = len(trace) * trace.nchains reff = eff_ave / samples log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: - raise ValueError('The model does not contain observed values.') + raise ValueError("The model does not contain observed values.") lw, ks = _psislw(-log_py, reff) lw += log_py warn_mg = 0 if np.any(ks > 0.7): - warnings.warn("""Estimated shape parameter of Pareto distribution is + warnings.warn( + """Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to - happen with a non-robust model and highly influential observations.""") + happen with a non-robust model and highly influential observations.""" + ) warn_mg = 1 - loo_lppd_i = - 2 * logsumexp(lw, axis=0) + loo_lppd_i = -2 * logsumexp(lw, axis=0) loo_lppd = loo_lppd_i.sum() loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5 - lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0])) + lppd = np.sum(logsumexp(log_py, axis=0, b=1.0 / log_py.shape[0])) p_loo = lppd + (0.5 * loo_lppd) if pointwise: if np.equal(loo_lppd, loo_lppd_i).all(): - warnings.warn("""The point-wise LOO is the same with the sum LOO, + warnings.warn( + """The point-wise LOO is the same with the sum LOO, please double check the Observed RV in your model to make sure it returns element-wise logp. - """) - LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, shape_warn, LOO_i') + """ + ) + LOO_r = namedtuple("LOO_r", "LOO, LOO_se, p_LOO, shape_warn, LOO_i") return LOO_r(loo_lppd, loo_lppd_se, p_loo, warn_mg, loo_lppd_i) else: - LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, shape_warn') + LOO_r = namedtuple("LOO_r", "LOO, LOO_se, p_LOO, shape_warn") return LOO_r(loo_lppd, loo_lppd_se, p_loo, warn_mg) @@ -335,13 +356,13 @@ def _psislw(lw, reff): """ n, m = lw.shape - lw_out = np.copy(lw, order='F') + lw_out = np.copy(lw, order="F") kss = np.empty(m) # precalculate constants - cutoff_ind = - int(np.ceil(min(n / 5., 3 * (n / reff) ** 0.5))) - 1 + cutoff_ind = -int(np.ceil(min(n / 5.0, 3 * (n / reff) ** 0.5))) - 1 cutoffmin = np.log(np.finfo(float).tiny) - k_min = 1. / 3 + k_min = 1.0 / 3 # loop over sets of log weights for i, x in enumerate(lw_out.T): @@ -405,10 +426,10 @@ def _gpdfit(x): prior_bs = 3 prior_k = 10 n = len(x) - m = 30 + int(n**0.5) + m = 30 + int(n ** 0.5) bs = 1 - np.sqrt(m / (np.arange(1, m + 1, dtype=float) - 0.5)) - bs /= prior_bs * x[int(n/4 + 0.5) - 1] + bs /= prior_bs * x[int(n / 4 + 0.5) - 1] bs += 1 / x[-1] ks = np.log1p(-bs[:, None] * x).mean(axis=1) @@ -426,10 +447,10 @@ def _gpdfit(x): # posterior mean for b b = np.sum(bs * w) # estimate for k - k = np.log1p(- b * x).mean() + k = np.log1p(-b * x).mean() # add prior for k k = (n * k + prior_k * 0.5) / (n + prior_k) - sigma = - k / b + sigma = -k / b return k, sigma @@ -442,13 +463,13 @@ def _gpinv(p, k, sigma): ok = (p > 0) & (p < 1) if np.all(ok): if np.abs(k) < np.finfo(float).eps: - x = - np.log1p(-p) + x = -np.log1p(-p) else: x = np.expm1(-k * np.log1p(-p)) / k x *= sigma else: if np.abs(k) < np.finfo(float).eps: - x[ok] = - np.log1p(-p[ok]) + x[ok] = -np.log1p(-p[ok]) else: x[ok] = np.expm1(-k * np.log1p(-p[ok])) / k x *= sigma @@ -456,14 +477,21 @@ def _gpinv(p, k, sigma): if k >= 0: x[p == 1] = np.inf else: - x[p == 1] = - sigma / k + x[p == 1] = -sigma / k return x -def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000, - alpha=1, seed=None, round_to=2): - R"""Compare models based on the widely available information criterion (WAIC) +def compare( + model_dict, + ic="WAIC", + method="stacking", + b_samples=1000, + alpha=1, + seed=None, + round_to=2, +): + r"""Compare models based on the widely available information criterion (WAIC) or leave-one-out (LOO) cross-validation. Read more theory here - in a paper by some of the leading authorities on model selection - dx.doi.org/10.1111/1467-9868.00353 @@ -528,29 +556,34 @@ def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000, if not names: names = np.arange(len(model_dict)) - if ic == 'WAIC': + if ic == "WAIC": ic_func = waic - df_comp = pd.DataFrame(index=names, - columns=['WAIC', 'pWAIC', 'dWAIC', 'weight', - 'SE', 'dSE', 'var_warn']) + df_comp = pd.DataFrame( + index=names, + columns=["WAIC", "pWAIC", "dWAIC", "weight", "SE", "dSE", "var_warn"], + ) - elif ic == 'LOO': + elif ic == "LOO": ic_func = loo - df_comp = pd.DataFrame(index=names, - columns=['LOO', 'pLOO', 'dLOO', 'weight', - 'SE', 'dSE', 'shape_warn']) + df_comp = pd.DataFrame( + index=names, + columns=["LOO", "pLOO", "dLOO", "weight", "SE", "dSE", "shape_warn"], + ) else: raise NotImplementedError( - 'The information criterion {} is not supported.'.format(ic)) + "The information criterion {} is not supported.".format(ic) + ) if len(set([len(m.observed_RVs) for m in model_dict])) != 1: raise ValueError( - 'The number of observed RVs should be the same across all models') + "The number of observed RVs should be the same across all models" + ) - if method not in ['stacking', 'BB-pseudo-BMA', 'pseudo-BMA']: - raise ValueError('The method {}, to compute weights,' - 'is not supported.'.format(method)) + if method not in ["stacking", "BB-pseudo-BMA", "pseudo-BMA"]: + raise ValueError( + "The method {}, to compute weights," "is not supported.".format(method) + ) ics = [] for n, (m, t) in zip(names, model_dict.items()): @@ -558,17 +591,17 @@ def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000, ics.sort(key=lambda x: x[1][0]) - if method == 'stacking': + if method == "stacking": N, K, ic_i = _ic_matrix(ics) exp_ic_i = np.exp(-0.5 * ic_i) Km = K - 1 def w_fuller(w): - return np.concatenate((w, [max(1. - np.sum(w), 0.)])) + return np.concatenate((w, [max(1.0 - np.sum(w), 0.0)])) def log_score(w): w_full = w_fuller(w) - score = 0. + score = 0.0 for i in range(N): score += np.log(np.dot(exp_ic_i[i], w_full)) return -score @@ -578,30 +611,36 @@ def gradient(w): grad = np.zeros(Km) for k in range(Km): for i in range(N): - grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / \ - np.dot(exp_ic_i[i], w_full) + grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / np.dot( + exp_ic_i[i], w_full + ) return -grad - theta = np.full(Km, 1. / K) - bounds = [(0., 1.) for i in range(Km)] - constraints = [{'type': 'ineq', 'fun': lambda x: -np.sum(x) + 1.}, - {'type': 'ineq', 'fun': lambda x: np.sum(x)}] - - w = minimize(fun=log_score, - x0=theta, - jac=gradient, - bounds=bounds, - constraints=constraints) - - weights = w_fuller(w['x']) + theta = np.full(Km, 1.0 / K) + bounds = [(0.0, 1.0) for i in range(Km)] + constraints = [ + {"type": "ineq", "fun": lambda x: -np.sum(x) + 1.0}, + {"type": "ineq", "fun": lambda x: np.sum(x)}, + ] + + w = minimize( + fun=log_score, + x0=theta, + jac=gradient, + bounds=bounds, + constraints=constraints, + ) + + weights = w_fuller(w["x"]) ses = [res[1] for _, res in ics] - elif method == 'BB-pseudo-BMA': + elif method == "BB-pseudo-BMA": N, K, ic_i = _ic_matrix(ics) ic_i = ic_i * N - b_weighting = dirichlet.rvs(alpha=[alpha] * N, size=b_samples, - random_state=seed) + b_weighting = dirichlet.rvs( + alpha=[alpha] * N, size=b_samples, random_state=seed + ) weights = np.zeros((b_samples, K)) z_bs = np.zeros_like(weights) for i in range(b_samples): @@ -613,7 +652,7 @@ def gradient(w): weights = weights.mean(0) ses = z_bs.std(0) - elif method == 'pseudo-BMA': + elif method == "pseudo-BMA": min_ic = ics[0][1][0] Z = np.sum([np.exp(-0.5 * (x[1][0] - min_ic)) for x in ics]) weights = [] @@ -629,13 +668,15 @@ def gradient(w): d_se = np.sqrt(len(diff) * np.var(diff)) se = ses[i] weight = weights[i] - df_comp.at[idx] = (round(res[0], round_to), - round(res[2], round_to), - round(d_ic, round_to), - round(weight, round_to), - round(se, round_to), - round(d_se, round_to), - res[3]) + df_comp.at[idx] = ( + round(res[0], round_to), + round(res[2], round_to), + round(d_ic, round_to), + round(weight, round_to), + round(se, round_to), + round(d_se, round_to), + res[3], + ) return df_comp.sort_values(by=ic) @@ -651,13 +692,15 @@ def _ic_matrix(ics): for i in range(K): ic = ics[i][1][4] if len(ic) != N: - raise ValueError('The number of observations should be the same ' - 'across all models') + raise ValueError( + "The number of observations should be the same " "across all models" + ) else: ic_i[:, i] = ic return N, K, ic_i + def make_indices(dimensions): # Generates complete set of indices for given dimensions level = len(dimensions) @@ -690,7 +733,7 @@ def calc_min_interval(x, alpha): interval_width = x[interval_idx_inc:] - x[:n_intervals] if len(interval_width) == 0: - raise ValueError('Too few elements for interval calculation') + raise ValueError("Too few elements for interval calculation") min_idx = np.argmin(interval_width) hdi_min = x[min_idx] @@ -752,14 +795,16 @@ def hpd(x, alpha=0.05, transform=lambda x: x): def _hpd_df(x, alpha): - cnames = ['hpd_{0:g}'.format(100 * alpha / 2), - 'hpd_{0:g}'.format(100 * (1 - alpha / 2))] + cnames = [ + "hpd_{0:g}".format(100 * alpha / 2), + "hpd_{0:g}".format(100 * (1 - alpha / 2)), + ] return pd.DataFrame(hpd(x, alpha), columns=cnames) @statfunc def mc_error(x, batches=5): - R"""Calculates the simulation standard error, accounting for non-independent + r"""Calculates the simulation standard error, accounting for non-independent samples. The trace is divided into batches, and the standard deviation of the batch means is calculated. @@ -777,7 +822,7 @@ def mc_error(x, batches=5): if x.ndim > 1: dims = np.shape(x) - #ttrace = np.transpose(np.reshape(trace, (dims[0], sum(dims[1:])))) + # ttrace = np.transpose(np.reshape(trace, (dims[0], sum(dims[1:])))) trace = np.transpose([t.ravel() for t in x]) return np.reshape([mc_error(t, batches) for t in trace], dims[1:]) @@ -801,7 +846,7 @@ def mc_error(x, batches=5): @statfunc def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5), transform=lambda x: x): - R"""Returns a dictionary of requested quantiles from array + r"""Returns a dictionary of requested quantiles from array Parameters ---------- @@ -836,11 +881,13 @@ def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5), transform=lambda x: x): except IndexError: pm._log.warning("Too few elements for quantile calculation") + def dict2pd(statdict, labelname): """Small helper function to transform a diagnostics output dict into a pandas Series. """ from .backends import tracetab as ttab + var_dfs = [] for key, value in statdict.items(): var_df = pd.Series(value.flatten()) @@ -850,10 +897,19 @@ def dict2pd(statdict, labelname): statpd = statpd.rename(labelname) return statpd -def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None, - extend=False, include_transformed=False, - alpha=0.05, start=0, batches=None): - R"""Create a data frame with summary statistics. + +def summary( + trace, + varnames=None, + transform=lambda x: x, + stat_funcs=None, + extend=False, + include_transformed=False, + alpha=0.05, + start=0, + batches=None, +): + r"""Create a data frame with summary statistics. Parameters ---------- @@ -939,16 +995,19 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None, from .backends import tracetab as ttab if varnames is None: - varnames = get_default_varnames(trace.varnames, - include_transformed=include_transformed) + varnames = get_default_varnames( + trace.varnames, include_transformed=include_transformed + ) if batches is None: batches = min([100, len(trace)]) - funcs = [lambda x: pd.Series(np.mean(x, 0), name='mean'), - lambda x: pd.Series(np.std(x, 0), name='sd'), - lambda x: pd.Series(mc_error(x, batches), name='mc_error'), - lambda x: _hpd_df(x, alpha)] + funcs = [ + lambda x: pd.Series(np.mean(x, 0), name="mean"), + lambda x: pd.Series(np.std(x, 0), name="sd"), + lambda x: pd.Series(mc_error(x, batches), name="mc_error"), + lambda x: _hpd_df(x, alpha), + ] if stat_funcs is not None: if extend: @@ -970,16 +1029,15 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None, elif trace.nchains < 2: return dforg else: - n_eff = pm.effective_n(trace, - varnames=varnames, - include_transformed=include_transformed) - n_eff_pd = dict2pd(n_eff, 'n_eff') - rhat = pm.gelman_rubin(trace, - varnames=varnames, - include_transformed=include_transformed) - rhat_pd = dict2pd(rhat, 'Rhat') - return pd.concat([dforg, n_eff_pd, rhat_pd], - axis=1, join_axes=[dforg.index]) + n_eff = pm.effective_n( + trace, varnames=varnames, include_transformed=include_transformed + ) + n_eff_pd = dict2pd(n_eff, "n_eff") + rhat = pm.gelman_rubin( + trace, varnames=varnames, include_transformed=include_transformed + ) + rhat_pd = dict2pd(rhat, "Rhat") + return pd.concat([dforg, n_eff_pd, rhat_pd], axis=1, join_axes=[dforg.index]) def _calculate_stats(sample, batches, alpha): @@ -992,14 +1050,14 @@ def _calculate_stats(sample, batches, alpha): for idx in idxs: mean, sd, mce = [stat[idx] for stat in (means, sds, mces)] interval = intervals[idx].squeeze().tolist() - yield {'mean': mean, 'sd': sd, 'mce': mce, 'hpd': interval} + yield {"mean": mean, "sd": sd, "mce": mce, "hpd": interval} def _calculate_posterior_quantiles(sample, qlist): var_quantiles = quantiles(sample, qlist=qlist) # Replace ends of qlist with 'lo' and 'hi' - qends = {qlist[0]: 'lo', qlist[-1]: 'hi'} - qkeys = {q: qends[q] if q in qends else 'q{}'.format(q) for q in qlist} + qends = {qlist[0]: "lo", qlist[-1]: "hi"} + qkeys = {q: qends[q] if q in qends else "q{}".format(q) for q in qlist} for key, idxs in _groupby_leading_idxs(sample.shape[1:]): yield key for idx in idxs: @@ -1044,7 +1102,7 @@ def _groupby_leading_idxs(shape): def bfmi(trace): - R"""Calculate the estimated Bayesian fraction of missing information (BFMI). + r"""Calculate the estimated Bayesian fraction of missing information (BFMI). BFMI quantifies how well momentum resampling matches the marginal energy distribution. For more information on BFMI, see @@ -1063,13 +1121,13 @@ def bfmi(trace): z : float The Bayesian fraction of missing information of the model and trace. """ - energy = trace['energy'] + energy = trace["energy"] return np.square(np.diff(energy)).mean() / np.var(energy) def r2_score(y_true, y_pred, round_to=2): - R"""R-squared for Bayesian regression models. Only valid for linear models. + r"""R-squared for Bayesian regression models. Only valid for linear models. http://www.stat.columbia.edu/%7Egelman/research/unpublished/bayes_R2.pdf Parameters @@ -1099,6 +1157,5 @@ def r2_score(y_true, y_pred, round_to=2): r2_median = np.around(np.median(r2), round_to) r2_mean = np.around(np.mean(r2), round_to) r2_std = np.around(np.std(r2), round_to) - r2_r = namedtuple('r2_r', 'r2_median, r2_mean, r2_std') + r2_r = namedtuple("r2_r", "r2_median, r2_mean, r2_std") return r2_r(r2_median, r2_mean, r2_std) - diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index 8366bbb5c3..413887f98e 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -6,8 +6,7 @@ from numpy.random import uniform from enum import IntEnum, unique -__all__ = [ - 'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence'] +__all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"] @unique @@ -19,6 +18,7 @@ class Competence(IntEnum): 2: PREFERRED 3: IDEAL """ + INCOMPATIBLE = 0 COMPATIBLE = 1 PREFERRED = 2 @@ -30,21 +30,21 @@ class BlockedStep(object): generates_stats = False def __new__(cls, *args, **kwargs): - blocked = kwargs.get('blocked') + blocked = kwargs.get("blocked") if blocked is None: # Try to look up default value from class - blocked = getattr(cls, 'default_blocked', True) - kwargs['blocked'] = blocked + blocked = getattr(cls, "default_blocked", True) + kwargs["blocked"] = blocked - model = modelcontext(kwargs.get('model')) - kwargs.update({'model':model}) + model = modelcontext(kwargs.get("model")) + kwargs.update({"model": model}) # vars can either be first arg or a kwarg - if 'vars' not in kwargs and len(args) >= 1: + if "vars" not in kwargs and len(args) >= 1: vars = args[0] args = args[1:] - elif 'vars' in kwargs: - vars = kwargs.pop('vars') + elif "vars" in kwargs: + vars = kwargs.pop("vars") else: # Assume all model variables vars = model.vars @@ -52,7 +52,7 @@ def __new__(cls, *args, **kwargs): vars = inputvars(vars) if len(vars) == 0: - raise ValueError('No free random variables to sample.') + raise ValueError("No free random variables to sample.") if not blocked and len(vars) > 1: # In this case we create a separate sampler for each var @@ -64,14 +64,14 @@ def __new__(cls, *args, **kwargs): # call __init__ step.__init__([var], *args, **kwargs) # Hack for creating the class correctly when unpickling. - step.__newargs = ([var], ) + args, kwargs + step.__newargs = ([var],) + args, kwargs steps.append(step) return CompoundStep(steps) else: step = super(BlockedStep, cls).__new__(cls) # Hack for creating the class correctly when unpickling. - step.__newargs = (vars, ) + args, kwargs + step.__newargs = (vars,) + args, kwargs return step # Hack for creating the class correctly when unpickling. @@ -104,7 +104,7 @@ def vars_shape_dtype(self): return shape_dtypes def stop_tuning(self): - if hasattr(self, 'tune'): + if hasattr(self, "tune"): self.tune = False @@ -212,20 +212,22 @@ def link_population(self, population, chain_index): self.this_chain = chain_index self.other_chains = [c for c in range(len(population)) if c != chain_index] if not len(self.other_chains) > 1: - raise ValueError('Population is just {} + {}. This is too small. You should ' \ - 'increase the number of chains.'.format(self.this_chain, self.other_chains)) + raise ValueError( + "Population is just {} + {}. This is too small. You should " + "increase the number of chains.".format( + self.this_chain, self.other_chains + ) + ) return class GradientSharedStep(BlockedStep): - def __init__(self, vars, model=None, blocked=True, - dtype=None, **theano_kwargs): + def __init__(self, vars, model=None, blocked=True, dtype=None, **theano_kwargs): model = modelcontext(model) self.vars = vars self.blocked = blocked - func = model.logp_dlogp_function( - vars, dtype=dtype, **theano_kwargs) + func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) # handle edge case discovered in #2948 try: @@ -233,9 +235,8 @@ def __init__(self, vars, model=None, blocked=True, q = func.dict_to_array(model.test_point) logp, dlogp = func(q) except ValueError: - theano_kwargs.update(mode='FAST_COMPILE') - func = model.logp_dlogp_function( - vars, dtype=dtype, **theano_kwargs) + theano_kwargs.update(mode="FAST_COMPILE") + func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) self._logp_dlogp_func = func diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py index 8deb0555fd..fead664e5d 100644 --- a/pymc3/step_methods/compound.py +++ b/pymc3/step_methods/compound.py @@ -1,8 +1,8 @@ -''' +""" Created on Mar 7, 2011 @author: johnsalvatier -''' +""" import numpy as np @@ -12,8 +12,7 @@ class CompoundStep(object): def __init__(self, methods): self.methods = list(methods) - self.generates_stats = any( - method.generates_stats for method in self.methods) + self.generates_stats = any(method.generates_stats for method in self.methods) self.stats_dtypes = [] for method in self.methods: if method.generates_stats: @@ -32,7 +31,7 @@ def step(self, point): # one. Pop all others (if dict), or set to np.nan (if namedtuple). for state in states[:-1]: if isinstance(state, dict): - state.pop('model_logp', None) + state.pop("model_logp", None) elif isinstance(state, namedtuple): state = state._replace(logp=np.nan) return point, states @@ -44,7 +43,7 @@ def step(self, point): def warnings(self): warns = [] for method in self.methods: - if hasattr(method, 'warnings'): + if hasattr(method, "warnings"): warns.extend(method.warnings()) return warns diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py index 5936c554c0..cd4cf47fdd 100644 --- a/pymc3/step_methods/elliptical_slice.py +++ b/pymc3/step_methods/elliptical_slice.py @@ -7,7 +7,7 @@ from ..theanof import inputvars from ..distributions import draw_values -__all__ = ['EllipticalSlice'] +__all__ = ["EllipticalSlice"] def get_chol(cov, chol): @@ -27,7 +27,7 @@ def get_chol(cov, chol): """ if len([i for i in [cov, chol] if i is not None]) != 1: - raise ValueError('Must pass exactly one of cov or chol') + raise ValueError("Must pass exactly one of cov or chol") if cov is not None: chol = tt.slinalg.cholesky(cov) @@ -69,8 +69,9 @@ class EllipticalSlice(ArrayStep): default_blocked = True - def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, - **kwargs): + def __init__( + self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs + ): self.model = modelcontext(model) chol = get_chol(prior_cov, prior_chol) self.prior_chol = tt.as_tensor_variable(chol) diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py index 92fc6b02a1..5f680c157f 100644 --- a/pymc3/step_methods/gibbs.py +++ b/pymc3/step_methods/gibbs.py @@ -1,18 +1,29 @@ -''' +""" Created on May 12, 2012 @author: john -''' +""" from .arraystep import ArrayStep, Competence from ..distributions.discrete import Categorical -from numpy import array, max, exp, cumsum, nested_iters, empty, searchsorted, ones, arange +from numpy import ( + array, + max, + exp, + cumsum, + nested_iters, + empty, + searchsorted, + ones, + arange, +) from numpy.random import uniform from warnings import warn from theano.gof.graph import inputs from theano.tensor import add from ..model import modelcontext -__all__ = ['ElemwiseCategorical'] + +__all__ = ["ElemwiseCategorical"] class ElemwiseCategorical(ArrayStep): @@ -21,13 +32,17 @@ class ElemwiseCategorical(ArrayStep): the variable can't be indexed into or transposed or anything otherwise that will mess things up """ + # TODO: It would be great to come up with a way to make # ElemwiseCategorical more general (handling more complex elementwise # variables) def __init__(self, vars, values=None, model=None): - warn('ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.', - DeprecationWarning, stacklevel = 2) + warn( + "ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.", + DeprecationWarning, + stacklevel=2, + ) model = modelcontext(model) self.var = vars[0] self.sh = ones(self.var.dshape, self.var.dtype) @@ -37,7 +52,8 @@ def __init__(self, vars, values=None, model=None): self.values = values super(ElemwiseCategorical, self).__init__( - vars, [elemwise_logp(model, self.var)]) + vars, [elemwise_logp(model, self.var)] + ) def astep(self, q, logp): p = array([logp(v * self.sh) for v in self.values]) @@ -51,8 +67,7 @@ def competence(var, has_grad): def elemwise_logp(model, var): - terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([ - v.logpt])] + terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([v.logpt])] return model.fn(add(*terms)) @@ -60,9 +75,12 @@ def categorical(prob, shape): out = empty([1] + list(shape)) n = len(shape) - it0, it1 = nested_iters([prob, out], [list(range(1, n + 1)), [0]], - op_flags=[['readonly'], ['readwrite']], - flags=['reduce_ok']) + it0, it1 = nested_iters( + [prob, out], + [list(range(1, n + 1)), [0]], + op_flags=[["readonly"], ["readwrite"]], + flags=["reduce_ok"], + ) for _ in it0: p, o = it1.itviews diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 96ad165888..0eecfbc391 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -10,16 +10,13 @@ from .quadpotential import quad_potential, QuadPotentialDiagAdapt from pymc3.step_methods import step_sizes from pymc3.backends.report import SamplerWarning, WarningType -logger = logging.getLogger('pymc3') -HMCStepData = namedtuple( - "HMCStepData", - "end, accept_stat, divergence_info, stats") +logger = logging.getLogger("pymc3") +HMCStepData = namedtuple("HMCStepData", "end, accept_stat, divergence_info, stats") -DivergenceInfo = namedtuple( - 'DivergenceInfo', - 'message, exec_info, state') + +DivergenceInfo = namedtuple("DivergenceInfo", "message, exec_info, state") class BaseHMC(arraystep.GradientSharedStep): @@ -27,12 +24,26 @@ class BaseHMC(arraystep.GradientSharedStep): default_blocked = True - def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, - model=None, blocked=True, potential=None, - integrator="leapfrog", dtype=None, Emax=1000, - target_accept=0.8, gamma=0.05, k=0.75, t0=10, - adapt_step_size=True, step_rand=None, - **theano_kwargs): + def __init__( + self, + vars=None, + scaling=None, + step_scale=0.25, + is_cov=False, + model=None, + blocked=True, + potential=None, + integrator="leapfrog", + dtype=None, + Emax=1000, + target_accept=0.8, + gamma=0.05, + k=0.75, + t0=10, + adapt_step_size=True, + step_rand=None, + **theano_kwargs + ): """Set up Hamiltonian samplers with common structures. Parameters @@ -59,8 +70,9 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, vars = self._model.cont_vars vars = inputvars(vars) - super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, - dtype=dtype, **theano_kwargs) + super(BaseHMC, self).__init__( + vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs + ) self.adapt_step_size = adapt_step_size self.Emax = Emax @@ -70,7 +82,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, self.step_size = step_scale / (size ** 0.25) self.target_accept = target_accept self.step_adapt = step_sizes.DualAverageAdaptation( - self.step_size, target_accept, gamma, k, t0) + self.step_size, target_accept, gamma, k, t0 + ) self.tune = True @@ -92,7 +105,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator( - self.potential, self._logp_dlogp_func) + self.potential, self._logp_dlogp_func + ) self._step_rand = step_rand self._warnings = [] @@ -111,13 +125,19 @@ def astep(self, q0): p0 = self.potential.random() start = self.integrator.compute_state(q0, p0) model = self._model - + if not np.isfinite(start.energy): check_test_point = model.check_test_point() - error_logp = check_test_point.loc[(np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)] + error_logp = check_test_point.loc[ + (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point) + ] self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap) - logger.error("Bad initial energy, check any log probabilities that are inf or -inf, nan or very small:\n{}".format(error_logp.to_string())) - raise ValueError('Bad initial energy') + logger.error( + "Bad initial energy, check any log probabilities that are inf or -inf, nan or very small:\n{}".format( + error_logp.to_string() + ) + ) + raise ValueError("Bad initial energy") adapt_step = self.tune and self.adapt_step_size step_size = self.step_adapt.current(adapt_step) @@ -144,8 +164,8 @@ def astep(self, q0): else: point = None warning = SamplerWarning( - kind, info.message, 'debug', self.iter_count, - info.exec_info, point) + kind, info.message, "debug", self.iter_count, info.exec_info, point + ) self._warnings.append(warning) @@ -153,10 +173,7 @@ def astep(self, q0): if not self.tune: self._samples_after_tune += 1 - stats = { - 'tune': self.tune, - 'diverging': bool(hmc_step.divergence_info), - } + stats = {"tune": self.tune, "diverging": bool(hmc_step.divergence_info)} stats.update(hmc_step.stats) stats.update(self.step_adapt.stats()) @@ -172,21 +189,28 @@ def warnings(self): warnings = self._warnings[:] # Generate a global warning for divergences - message = '' + message = "" n_divs = self._num_divs_sample if n_divs and self._samples_after_tune == n_divs: - message = ('The chain contains only diverging samples. The model ' - 'is probably misspecified.') + message = ( + "The chain contains only diverging samples. The model " + "is probably misspecified." + ) elif n_divs == 1: - message = ('There was 1 divergence after tuning. Increase ' - '`target_accept` or reparameterize.') + message = ( + "There was 1 divergence after tuning. Increase " + "`target_accept` or reparameterize." + ) elif n_divs > 1: - message = ('There were %s divergences after tuning. Increase ' - '`target_accept` or reparameterize.' % n_divs) + message = ( + "There were %s divergences after tuning. Increase " + "`target_accept` or reparameterize." % n_divs + ) if message: warning = SamplerWarning( - WarningType.DIVERGENCES, message, 'error', None, None, None) + WarningType.DIVERGENCES, message, "error", None, None, None + ) warnings.append(warning) warnings.extend(self.step_adapt.warnings()) diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py index e918eca43c..302c312b08 100644 --- a/pymc3/step_methods/hmc/hmc.py +++ b/pymc3/step_methods/hmc/hmc.py @@ -6,40 +6,50 @@ from pymc3.step_methods.hmc.base_hmc import BaseHMC, HMCStepData, DivergenceInfo -__all__ = ['HamiltonianMC'] +__all__ = ["HamiltonianMC"] -def unif(step_size, elow=.85, ehigh=1.15): +def unif(step_size, elow=0.85, ehigh=1.15): return np.random.uniform(elow, ehigh) * step_size class HamiltonianMC(BaseHMC): - R"""A sampler for continuous variables based on Hamiltonian mechanics. + r"""A sampler for continuous variables based on Hamiltonian mechanics. See NUTS sampler for automatically tuned stopping time and step size scaling. """ - name = 'hmc' + name = "hmc" default_blocked = True generates_stats = True - stats_dtypes = [{ - 'step_size': np.float64, - 'n_steps': np.int64, - 'tune': np.bool, - 'step_size_bar': np.float64, - 'accept': np.float64, - 'diverging': np.bool, - 'energy_error': np.float64, - 'energy': np.float64, - 'max_energy_error': np.float64, - 'path_length': np.float64, - 'accepted': np.bool, - 'model_logp': np.float64, - }] - - def __init__(self, vars=None, path_length=2., - adapt_step_size=True, gamma=0.05, k=0.75, t0=10, - target_accept=0.8, **kwargs): + stats_dtypes = [ + { + "step_size": np.float64, + "n_steps": np.int64, + "tune": np.bool, + "step_size_bar": np.float64, + "accept": np.float64, + "diverging": np.bool, + "energy_error": np.float64, + "energy": np.float64, + "max_energy_error": np.float64, + "path_length": np.float64, + "accepted": np.bool, + "model_logp": np.float64, + } + ] + + def __init__( + self, + vars=None, + path_length=2.0, + adapt_step_size=True, + gamma=0.05, + k=0.75, + t0=10, + target_accept=0.8, + **kwargs + ): """Set up the Hamiltonian Monte Carlo sampler. Parameters @@ -99,16 +109,17 @@ def _hamiltonian_step(self, start, p0, step_size): for _ in range(n_steps): state = self.integrator.step(step_size, state) except IntegrationError as e: - div_info = DivergenceInfo('Divergence encountered.', e, state) + div_info = DivergenceInfo("Divergence encountered.", e, state) else: if not np.isfinite(state.energy): div_info = DivergenceInfo( - 'Divergence encountered, bad energy.', None, state) + "Divergence encountered, bad energy.", None, state + ) energy_change = start.energy - state.energy if np.abs(energy_change) > self.Emax: div_info = DivergenceInfo( - 'Divergence encountered, large integration error.', - None, state) + "Divergence encountered, large integration error.", None, state + ) accept_stat = min(1, np.exp(energy_change)) @@ -120,13 +131,13 @@ def _hamiltonian_step(self, start, p0, step_size): accepted = True stats = { - 'path_length': path_length, - 'n_steps': n_steps, - 'accept': accept_stat, - 'energy_error': energy_change, - 'energy': state.energy, - 'accepted': accepted, - 'model_logp': state.model_logp, + "path_length": path_length, + "n_steps": n_steps, + "accept": accept_stat, + "energy_error": energy_change, + "energy": state.energy, + "accepted": accepted, + "model_logp": state.model_logp, } return HMCStepData(end, accept_stat, div_info, stats) diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py index f64e876dcc..1ee9905201 100644 --- a/pymc3/step_methods/hmc/integration.py +++ b/pymc3/step_methods/hmc/integration.py @@ -4,7 +4,7 @@ from scipy import linalg -State = namedtuple("State", 'q, p, v, q_grad, energy, model_logp') +State = namedtuple("State", "q, p, v, q_grad, energy, model_logp") class IntegrationError(RuntimeError): @@ -18,14 +18,15 @@ def __init__(self, potential, logp_dlogp_func): self._logp_dlogp_func = logp_dlogp_func self._dtype = self._logp_dlogp_func.dtype if self._potential.dtype != self._dtype: - raise ValueError("dtypes of potential (%s) and logp function (%s)" - "don't match." - % (self._potential.dtype, self._dtype)) + raise ValueError( + "dtypes of potential (%s) and logp function (%s)" + "don't match." % (self._potential.dtype, self._dtype) + ) def compute_state(self, q, p): """Compute Hamiltonian functions using a position and momentum.""" if q.dtype != self._dtype or p.dtype != self._dtype: - raise ValueError('Invalid dtype. Must be %s' % self._dtype) + raise ValueError("Invalid dtype. Must be %s" % self._dtype) logp, dlogp = self._logp_dlogp_func(q) v = self._potential.velocity(p) kinetic = self._potential.energy(p, velocity=v) @@ -66,7 +67,7 @@ def step(self, epsilon, state, out=None): def _step(self, epsilon, state, out=None): pot = self._potential - axpy = linalg.blas.get_blas_funcs('axpy', dtype=self._dtype) + axpy = linalg.blas.get_blas_funcs("axpy", dtype=self._dtype) q, p, v, q_grad, energy, logp = state if out is None: diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py index 92fbe10f99..226e865116 100644 --- a/pymc3/step_methods/hmc/nuts.py +++ b/pymc3/step_methods/hmc/nuts.py @@ -12,7 +12,7 @@ from pymc3.theanof import floatX from pymc3.vartypes import continuous_types -__all__ = ['NUTS'] +__all__ = ["NUTS"] def logbern(log_p): @@ -22,7 +22,7 @@ def logbern(log_p): class NUTS(BaseHMC): - R"""A sampler for continuous variables based on Hamiltonian mechanics. + r"""A sampler for continuous variables based on Hamiltonian mechanics. NUTS automatically tunes the step size and the number of steps per sample. A detailed description can be found at [1], "Algorithm 6: @@ -72,27 +72,28 @@ class NUTS(BaseHMC): Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo. """ - name = 'nuts' + name = "nuts" default_blocked = True generates_stats = True - stats_dtypes = [{ - 'depth': np.int64, - 'step_size': np.float64, - 'tune': np.bool, - 'mean_tree_accept': np.float64, - 'step_size_bar': np.float64, - 'tree_size': np.float64, - 'diverging': np.bool, - 'energy_error': np.float64, - 'energy': np.float64, - 'max_energy_error': np.float64, - 'model_logp': np.float64, - }] - - def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, - **kwargs): - R"""Set up the No-U-Turn sampler. + stats_dtypes = [ + { + "depth": np.int64, + "step_size": np.float64, + "tune": np.bool, + "mean_tree_accept": np.float64, + "step_size_bar": np.float64, + "tree_size": np.float64, + "diverging": np.bool, + "energy_error": np.float64, + "energy": np.float64, + "max_energy_error": np.float64, + "model_logp": np.float64, + } + ] + + def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs): + r"""Set up the No-U-Turn sampler. Parameters ---------- @@ -176,7 +177,7 @@ def _hamiltonian_step(self, start, p0, step_size): self._reached_max_treedepth += 1 stats = tree.stats() - accept_stat = stats['mean_tree_accept'] + accept_stat = stats["mean_tree_accept"] return HMCStepData(tree.proposal, accept_stat, divergence_info, stats) @staticmethod @@ -192,10 +193,11 @@ def warnings(self): n_treedepth = self._reached_max_treedepth if n_samples > 0 and n_treedepth / float(n_samples) > 0.05: - msg = ('The chain reached the maximum tree depth. Increase ' - 'max_treedepth, increase target_accept or reparameterize.') - warn = SamplerWarning(WarningType.TREEDEPTH, msg, 'warn', - None, None, None) + msg = ( + "The chain reached the maximum tree depth. Increase " + "max_treedepth, increase target_accept or reparameterize." + ) + warn = SamplerWarning(WarningType.TREEDEPTH, msg, "warn", None, None, None) warnings.append(warn) return warnings @@ -205,8 +207,8 @@ def warnings(self): # A subtree of the binary tree built by nuts. Subtree = namedtuple( - "Subtree", - "left, right, p_sum, proposal, log_size, accept_sum, n_proposals") + "Subtree", "left, right, p_sum, proposal, log_size, accept_sum, n_proposals" +) class _Tree(object): @@ -234,7 +236,8 @@ def __init__(self, ndim, integrator, start, step_size, Emax): self.left = self.right = start self.proposal = Proposal( - start.q, start.q_grad, start.energy, 1.0, start.model_logp) + start.q, start.q_grad, start.energy, 1.0, start.model_logp + ) self.depth = 0 self.log_size = 0 self.accept_sum = 0 @@ -256,11 +259,13 @@ def extend(self, direction): """ if direction > 0: tree, diverging, turning = self._build_subtree( - self.right, self.depth, floatX(np.asarray(self.step_size))) + self.right, self.depth, floatX(np.asarray(self.step_size)) + ) self.right = tree.right else: tree, diverging, turning = self._build_subtree( - self.left, self.depth, floatX(np.asarray(-self.step_size))) + self.left, self.depth, floatX(np.asarray(-self.step_size)) + ) self.left = tree.right self.depth += 1 @@ -301,13 +306,14 @@ def _single_step(self, left, epsilon): p_accept = min(1, np.exp(-energy_change)) log_size = -energy_change proposal = Proposal( - right.q, right.q_grad, right.energy, p_accept, right.model_logp) - tree = Subtree(right, right, right.p, - proposal, log_size, p_accept, 1) + right.q, right.q_grad, right.energy, p_accept, right.model_logp + ) + tree = Subtree(right, right, right.p, proposal, log_size, p_accept, 1) return tree, None, False else: - error_msg = ("Energy change in leapfrog step is too large: %s." - % energy_change) + error_msg = ( + "Energy change in leapfrog step is too large: %s." % energy_change + ) error = None tree = Subtree(None, None, None, None, -np.inf, 0, 1) divergance_info = DivergenceInfo(error_msg, error, left) @@ -317,13 +323,11 @@ def _build_subtree(self, left, depth, epsilon): if depth == 0: return self._single_step(left, epsilon) - tree1, diverging, turning = self._build_subtree( - left, depth - 1, epsilon) + tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon) if diverging or turning: return tree1, diverging, turning - tree2, diverging, turning = self._build_subtree( - tree1.right, depth - 1, epsilon) + tree2, diverging, turning = self._build_subtree(tree1.right, depth - 1, epsilon) left, right = tree1.left, tree2.right @@ -344,17 +348,16 @@ def _build_subtree(self, left, depth, epsilon): accept_sum = tree1.accept_sum + tree2.accept_sum n_proposals = tree1.n_proposals + tree2.n_proposals - tree = Subtree(left, right, p_sum, proposal, - log_size, accept_sum, n_proposals) + tree = Subtree(left, right, p_sum, proposal, log_size, accept_sum, n_proposals) return tree, diverging, turning def stats(self): return { - 'depth': self.depth, - 'mean_tree_accept': self.accept_sum / self.n_proposals, - 'energy_error': self.proposal.energy - self.start.energy, - 'energy': self.proposal.energy, - 'tree_size': self.n_proposals, - 'max_energy_error': self.max_energy_change, - 'model_logp': self.proposal.logp, + "depth": self.depth, + "mean_tree_accept": self.accept_sum / self.n_proposals, + "energy_error": self.proposal.energy - self.start.energy, + "energy": self.proposal.energy, + "tree_size": self.n_proposals, + "max_energy_error": self.max_energy_change, + "model_logp": self.proposal.logp, } diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 6c67a8f1c3..1142948530 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -7,8 +7,14 @@ from pymc3.theanof import floatX -__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull', - 'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', 'isquadpotential'] +__all__ = [ + "quad_potential", + "QuadPotentialDiag", + "QuadPotentialFull", + "QuadPotentialFullInv", + "QuadPotentialDiagAdapt", + "isquadpotential", +] def quad_potential(C, is_cov): @@ -40,7 +46,7 @@ def quad_potential(C, is_cov): if is_cov: return QuadPotentialDiag(C) else: - return QuadPotentialDiag(1. / C) + return QuadPotentialDiag(1.0 / C) else: if is_cov: return QuadPotentialFull(C) @@ -58,7 +64,8 @@ def partial_check_positive_definite(C): if len(i): raise PositiveDefiniteError( - "Simple check failed. Diagonal contains negatives", i) + "Simple check failed. Diagonal contains negatives", i + ) class PositiveDefiniteError(ValueError): @@ -68,23 +75,25 @@ def __init__(self, msg, idx): self.msg = msg def __str__(self): - return ("Scaling is not positive definite: %s. Check indexes %s." - % (self.msg, self.idx)) + return "Scaling is not positive definite: %s. Check indexes %s." % ( + self.msg, + self.idx, + ) class QuadPotential(object): def velocity(self, x, out=None): """Compute the current velocity at a position in parameter space.""" - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def energy(self, x, velocity=None): - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def random(self, x): - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def velocity_energy(self, x, v_out): - raise NotImplementedError('Abstract method') + raise NotImplementedError("Abstract method") def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning. @@ -124,19 +133,30 @@ def isquadpotential(value): class QuadPotentialDiagAdapt(QuadPotential): """Adapt a diagonal mass matrix from the sample variances.""" - def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0, - adaptation_window=101, dtype=None): + def __init__( + self, + n, + initial_mean, + initial_diag=None, + initial_weight=0, + adaptation_window=101, + dtype=None, + ): """Set up a diagonal mass matrix.""" if initial_diag is not None and initial_diag.ndim != 1: - raise ValueError('Initial diagonal must be one-dimensional.') + raise ValueError("Initial diagonal must be one-dimensional.") if initial_mean.ndim != 1: - raise ValueError('Initial mean must be one-dimensional.') + raise ValueError("Initial mean must be one-dimensional.") if initial_diag is not None and len(initial_diag) != n: - raise ValueError('Wrong shape for initial_diag: expected %s got %s' - % (n, len(initial_diag))) + raise ValueError( + "Wrong shape for initial_diag: expected %s got %s" + % (n, len(initial_diag)) + ) if len(initial_mean) != n: - raise ValueError('Wrong shape for initial_mean: expected %s got %s' - % (n, len(initial_mean))) + raise ValueError( + "Wrong shape for initial_mean: expected %s got %s" + % (n, len(initial_mean)) + ) if dtype is None: dtype = theano.config.floatX @@ -150,9 +170,10 @@ def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0, self._var = np.array(initial_diag, dtype=self.dtype, copy=True) self._var_theano = theano.shared(self._var) self._stds = np.sqrt(initial_diag) - self._inv_stds = floatX(1.) / self._stds + self._inv_stds = floatX(1.0) / self._stds self._foreground_var = _WeightedVariance( - self._n, initial_mean, initial_diag, initial_weight, self.dtype) + self._n, initial_mean, initial_diag, initial_weight, self.dtype + ) self._background_var = _WeightedVariance(self._n, dtype=self.dtype) self._n_samples = 0 self.adaptation_window = adaptation_window @@ -224,11 +245,13 @@ def raise_ok(self, vmap): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(self._stds == 0)[0] - errmsg = ['Mass matrix contains zeros on the diagonal. '] + errmsg = ["Mass matrix contains zeros on the diagonal. "] for ii in index: - errmsg.append('The derivative of RV `{}`.ravel()[{}]' - ' is zero.'.format(*name_slc[ii])) - raise ValueError('\n'.join(errmsg)) + errmsg.append( + "The derivative of RV `{}`.ravel()[{}]" + " is zero.".format(*name_slc[ii]) + ) + raise ValueError("\n".join(errmsg)) if np.any(~np.isfinite(self._stds)): name_slc = [] @@ -238,11 +261,13 @@ def raise_ok(self, vmap): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(~np.isfinite(self._stds))[0] - errmsg = ['Mass matrix contains non-finite values on the diagonal. '] + errmsg = ["Mass matrix contains non-finite values on the diagonal. "] for ii in index: - errmsg.append('The derivative of RV `{}`.ravel()[{}]' - ' is non-finite.'.format(*name_slc[ii])) - raise ValueError('\n'.join(errmsg)) + errmsg.append( + "The derivative of RV `{}`.ravel()[{}]" + " is non-finite.".format(*name_slc[ii]) + ) + raise ValueError("\n".join(errmsg)) class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt): @@ -289,26 +314,32 @@ def update(self, sample, grad, tune): class _WeightedVariance(object): """Online algorithm for computing mean of variance.""" - def __init__(self, nelem, initial_mean=None, initial_variance=None, - initial_weight=0, dtype='d'): + def __init__( + self, + nelem, + initial_mean=None, + initial_variance=None, + initial_weight=0, + dtype="d", + ): self._dtype = dtype self.w_sum = float(initial_weight) self.w_sum2 = float(initial_weight) ** 2 if initial_mean is None: - self.mean = np.zeros(nelem, dtype='d') + self.mean = np.zeros(nelem, dtype="d") else: - self.mean = np.array(initial_mean, dtype='d', copy=True) + self.mean = np.array(initial_mean, dtype="d", copy=True) if initial_variance is None: - self.raw_var = np.zeros(nelem, dtype='d') + self.raw_var = np.zeros(nelem, dtype="d") else: - self.raw_var = np.array(initial_variance, dtype='d', copy=True) + self.raw_var = np.array(initial_variance, dtype="d", copy=True) self.raw_var[:] *= self.w_sum if self.raw_var.shape != (nelem,): - raise ValueError('Invalid shape for initial variance.') + raise ValueError("Invalid shape for initial variance.") if self.mean.shape != (nelem,): - raise ValueError('Invalid shape for initial mean.') + raise ValueError("Invalid shape for initial mean.") def add_sample(self, x, weight): x = np.asarray(x) @@ -322,7 +353,7 @@ def add_sample(self, x, weight): def current_variance(self, out=None): if self.w_sum == 0: - raise ValueError('Can not compute variance without samples.') + raise ValueError("Can not compute variance without samples.") if out is not None: return np.divide(self.raw_var, self.w_sum, out=out) else: @@ -347,10 +378,10 @@ def __init__(self, v, dtype=None): dtype = theano.config.floatX self.dtype = dtype v = v.astype(self.dtype) - s = v ** .5 + s = v ** 0.5 self.s = s - self.inv_s = 1. / s + self.inv_s = 1.0 / s self.v = v def velocity(self, x, out=None): @@ -368,7 +399,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is not None: return 0.5 * np.dot(x, velocity) - return .5 * x.dot(self.v * x) + return 0.5 * x.dot(self.v * x) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -408,7 +439,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is None: velocity = self.velocity(x) - return .5 * x.dot(velocity) + return 0.5 * x.dot(velocity) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -446,7 +477,7 @@ def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is None: velocity = self.velocity(x) - return .5 * x.dot(velocity) + return 0.5 * x.dot(velocity) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" @@ -458,12 +489,13 @@ def velocity_energy(self, x, v_out): try: import sksparse.cholmod as cholmod + chol_available = True except ImportError: chol_available = False if chol_available: - __all__ += ['QuadPotentialSparse'] + __all__ += ["QuadPotentialSparse"] import theano.sparse diff --git a/pymc3/step_methods/hmc/trajectory.py b/pymc3/step_methods/hmc/trajectory.py index 70efcb2259..865ac039e8 100644 --- a/pymc3/step_methods/hmc/trajectory.py +++ b/pymc3/step_methods/hmc/trajectory.py @@ -48,10 +48,12 @@ def _theano_energy_function(H, q, **theano_kwargs): energy_function : theano function that computes the energy at a point (p, q) in phase space p : Starting momentum variable. """ - p = tt.vector('p') + p = tt.vector("p") p.tag.test_value = q.tag.test_value total_energy = H.pot.energy(p) - H.logp(q) - energy_function = theano.function(inputs=[q, p], outputs=total_energy, **theano_kwargs) + energy_function = theano.function( + inputs=[q, p], outputs=total_energy, **theano_kwargs + ) energy_function.trust_input = True return energy_function, p @@ -81,23 +83,31 @@ def _theano_leapfrog_integrator(H, q, p, **theano_kwargs): theano function which returns q_new, p_new, energy_new """ - epsilon = tt.scalar('epsilon') - epsilon.tag.test_value = 1. + epsilon = tt.scalar("epsilon") + epsilon.tag.test_value = 1.0 - n_steps = tt.iscalar('n_steps') + n_steps = tt.iscalar("n_steps") n_steps.tag.test_value = 2 q_new, p_new = leapfrog(H, q, p, epsilon, n_steps) energy_new = energy(H, q_new, p_new) - f = theano.function([q, p, epsilon, n_steps], [q_new, p_new, energy_new], **theano_kwargs) + f = theano.function( + [q, p, epsilon, n_steps], [q_new, p_new, energy_new], **theano_kwargs + ) f.trust_input = True return f -def get_theano_hamiltonian_functions(model_vars, shared, logpt, potential, - use_single_leapfrog=False, - integrator="leapfrog", **theano_kwargs): +def get_theano_hamiltonian_functions( + model_vars, + shared, + logpt, + potential, + use_single_leapfrog=False, + integrator="leapfrog", + **theano_kwargs +): """Construct theano functions for the Hamiltonian, energy, and leapfrog integrator. Parameters @@ -174,15 +184,19 @@ def leapfrog(H, q, p, epsilon, n_steps): momentum : Theano.tensor momentum estimate at time :math:`n \cdot e`. """ + def full_update(p, q): p = p + epsilon * H.dlogp(q) q += epsilon * H.pot.velocity(p) return p, q + # This first line can't be +=, possibly because of theano p = p + 0.5 * epsilon * H.dlogp(q) # half momentum update q += epsilon * H.pot.velocity(p) # full position update if tt.gt(n_steps, 1): - (p_seq, q_seq), _ = theano.scan(full_update, outputs_info=[p, q], n_steps=n_steps - 1) + (p_seq, q_seq), _ = theano.scan( + full_update, outputs_info=[p, q], n_steps=n_steps - 1 + ) p, q = p_seq[-1], q_seq[-1] p += 0.5 * epsilon * H.dlogp(q) # half momentum update return q, p @@ -203,8 +217,8 @@ def _theano_single_threestage(H, q, p, q_grad, **theano_kwargs): Hamiltonian Monte Carlo." arXiv:1608.07048 [Stat], August 25, 2016. http://arxiv.org/abs/1608.07048. """ - epsilon = tt.scalar('epsilon') - epsilon.tag.test_value = 1. + epsilon = tt.scalar("epsilon") + epsilon.tag.test_value = 1.0 a = 12127897.0 / 102017882 b = 4271554.0 / 14421423 @@ -227,9 +241,11 @@ def _theano_single_threestage(H, q, p, q_grad, **theano_kwargs): new_energy = energy(H, q_e, p_e) - f = theano.function(inputs=[q, p, q_grad, epsilon], - outputs=[q_e, p_e, v_e, grad_e, new_energy], - **theano_kwargs) + f = theano.function( + inputs=[q, p, q_grad, epsilon], + outputs=[q_e, p_e, v_e, grad_e, new_energy], + **theano_kwargs + ) f.trust_input = True return f @@ -249,8 +265,8 @@ def _theano_single_twostage(H, q, p, q_grad, **theano_kwargs): Hamiltonian Monte Carlo." arXiv:1608.07048 [Stat], August 25, 2016. http://arxiv.org/abs/1608.07048. """ - epsilon = tt.scalar('epsilon') - epsilon.tag.test_value = 1. + epsilon = tt.scalar("epsilon") + epsilon.tag.test_value = 1.0 a = floatX((3 - np.sqrt(3)) / 6) @@ -263,9 +279,11 @@ def _theano_single_twostage(H, q, p, q_grad, **theano_kwargs): v_e = H.pot.velocity(p_e) new_energy = energy(H, q_e, p_e) - f = theano.function(inputs=[q, p, q_grad, epsilon], - outputs=[q_e, p_e, v_e, grad_e, new_energy], - **theano_kwargs) + f = theano.function( + inputs=[q, p, q_grad, epsilon], + outputs=[q_e, p_e, v_e, grad_e, new_energy], + **theano_kwargs + ) f.trust_input = True return f @@ -276,8 +294,8 @@ def _theano_single_leapfrog(H, q, p, q_grad, **theano_kwargs): See above for documentation. This is optimized for the case where only a single step is needed, in case of, for example, a recursive algorithm. """ - epsilon = tt.scalar('epsilon') - epsilon.tag.test_value = 1. + epsilon = tt.scalar("epsilon") + epsilon.tag.test_value = 1.0 p_new = p + 0.5 * epsilon * q_grad # half momentum update q_new = q + epsilon * H.pot.velocity(p_new) # full position update @@ -286,15 +304,17 @@ def _theano_single_leapfrog(H, q, p, q_grad, **theano_kwargs): energy_new = energy(H, q_new, p_new) v_new = H.pot.velocity(p_new) - f = theano.function(inputs=[q, p, q_grad, epsilon], - outputs=[q_new, p_new, v_new, q_new_grad, energy_new], - **theano_kwargs) + f = theano.function( + inputs=[q, p, q_grad, epsilon], + outputs=[q_new, p_new, v_new, q_new_grad, energy_new], + **theano_kwargs + ) f.trust_input = True return f INTEGRATORS_SINGLE = { - 'leapfrog': _theano_single_leapfrog, - 'two-stage': _theano_single_twostage, - 'three-stage': _theano_single_threestage, + "leapfrog": _theano_single_leapfrog, + "two-stage": _theano_single_twostage, + "three-stage": _theano_single_threestage, } diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 1b0228bc73..2958ad9438 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -5,13 +5,28 @@ import warnings from ..distributions import draw_values -from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence +from .arraystep import ( + ArrayStepShared, + PopulationArrayStepShared, + ArrayStep, + metrop_select, + Competence, +) import pymc3 as pm from pymc3.theanof import floatX -__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis', - 'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal', - 'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal'] +__all__ = [ + "Metropolis", + "DEMetropolis", + "BinaryMetropolis", + "BinaryGibbsMetropolis", + "CategoricalGibbsMetropolis", + "NormalProposal", + "CauchyProposal", + "LaplaceProposal", + "PoissonProposal", + "MultivariateNormalProposal", +] # Available proposal distributions for Metropolis @@ -39,7 +54,9 @@ def __call__(self): class LaplaceProposal(Proposal): def __call__(self): size = np.size(self.s) - return (nr.standard_exponential(size=size) - nr.standard_exponential(size=size)) * self.s + return ( + nr.standard_exponential(size=size) - nr.standard_exponential(size=size) + ) * self.s class PoissonProposal(Proposal): @@ -88,17 +105,25 @@ class Metropolis(ArrayStepShared): mode : string or `Mode` instance. compilation mode passed to Theano functions """ - name = 'metropolis' + + name = "metropolis" default_blocked = False generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'tune': np.bool, - }] - - def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., - tune=True, tune_interval=100, model=None, mode=None, **kwargs): + stats_dtypes = [{"accept": np.float64, "tune": np.bool}] + + def __init__( + self, + vars=None, + S=None, + proposal_dist=None, + scaling=1.0, + tune=True, + tune_interval=100, + model=None, + mode=None, + **kwargs + ): model = pm.modelcontext(model) @@ -118,7 +143,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., else: raise ValueError("Invalid rank for variance: %s" % S.ndim) - self.scaling = np.atleast_1d(scaling).astype('d') + self.scaling = np.atleast_1d(scaling).astype("d") self.tune = tune self.tune_interval = tune_interval self.steps_until_tune = tune_interval @@ -126,7 +151,8 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., # Determine type of variables self.discrete = np.concatenate( - [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]) + [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars] + ) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() @@ -139,8 +165,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1., def astep(self, q0): if not self.steps_until_tune and self.tune: # Tune scaling parameter - self.scaling = tune( - self.scaling, self.accepted / float(self.tune_interval)) + self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 @@ -149,13 +174,12 @@ def astep(self, q0): if self.any_discrete: if self.all_discrete: - delta = np.round(delta, 0).astype('int64') - q0 = q0.astype('int64') - q = (q0 + delta).astype('int64') + delta = np.round(delta, 0).astype("int64") + q0 = q0.astype("int64") + q = (q0 + delta).astype("int64") else: - delta[self.discrete] = np.round( - delta[self.discrete], 0) - q = (q0 + delta) + delta[self.discrete] = np.round(delta[self.discrete], 0) + q = q0 + delta else: q = floatX(q0 + delta) @@ -165,10 +189,7 @@ def astep(self, q0): self.steps_until_tune -= 1 - stats = { - 'tune': self.tune, - 'accept': np.exp(accept), - } + stats = {"tune": self.tune, "accept": np.exp(accept)} return q_new, [stats] @@ -233,16 +254,13 @@ class BinaryMetropolis(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - name = 'binary_metropolis' + + name = "binary_metropolis" generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'tune': np.bool, - 'p_jump': np.float64, - }] + stats_dtypes = [{"accept": np.float64, "tune": np.bool, "p_jump": np.float64}] - def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None): + def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None): model = pm.modelcontext(model) @@ -253,42 +271,36 @@ def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None): self.accepted = 0 if not all([v.dtype in pm.discrete_types for v in vars]): - raise ValueError( - 'All variables must be Bernoulli for BinaryMetropolis') + raise ValueError("All variables must be Bernoulli for BinaryMetropolis") super(BinaryMetropolis, self).__init__(vars, [model.fastlogp]) def astep(self, q0, logp): # Convert adaptive_scale_factor to a jump probability - p_jump = 1. - .5 ** self.scaling + p_jump = 1.0 - 0.5 ** self.scaling rand_array = nr.random(q0.shape) q = np.copy(q0) # Locations where switches occur, according to p_jump - switch_locs = (rand_array < p_jump) + switch_locs = rand_array < p_jump q[switch_locs] = True - q[switch_locs] accept = logp(q) - logp(q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted - stats = { - 'tune': self.tune, - 'accept': np.exp(accept), - 'p_jump': p_jump, - } + stats = {"tune": self.tune, "accept": np.exp(accept), "p_jump": p_jump} return q_new, [stats] @staticmethod def competence(var): - ''' + """ BinaryMetropolis is only suitable for binary (bool) and Categorical variables with k=1. - ''' - distribution = getattr( - var.distribution, 'parent_dist', var.distribution) + """ + distribution = getattr(var.distribution, "parent_dist", var.distribution) if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types): return Competence.COMPATIBLE elif isinstance(distribution, pm.Categorical) and (distribution.k == 2): @@ -313,9 +325,10 @@ class BinaryGibbsMetropolis(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - name = 'binary_gibbs_metropolis' - def __init__(self, vars, order='random', transit_p=.8, model=None): + name = "binary_gibbs_metropolis" + + def __init__(self, vars, order="random", transit_p=0.8, model=None): model = pm.modelcontext(model) @@ -324,18 +337,17 @@ def __init__(self, vars, order='random', transit_p=.8, model=None): self.dim = sum(v.dsize for v in vars) - if order == 'random': + if order == "random": self.shuffle_dims = True self.order = list(range(self.dim)) else: if sorted(order) != list(range(self.dim)): - raise ValueError('Argument \'order\' has to be a permutation') + raise ValueError("Argument 'order' has to be a permutation") self.shuffle_dims = False self.order = order if not all([v.dtype in pm.discrete_types for v in vars]): - raise ValueError( - 'All variables must be binary for BinaryGibbsMetropolis') + raise ValueError("All variables must be binary for BinaryGibbsMetropolis") super(BinaryGibbsMetropolis, self).__init__(vars, [model.fastlogp]) @@ -353,7 +365,9 @@ def astep(self, q0, logp): if nr.rand() < self.transit_p: curr_val, q[idx] = q[idx], True - q[idx] logp_prop = logp(q) - q[idx], accepted = metrop_select(logp_prop - logp_curr, q[idx], curr_val) + q[idx], accepted = metrop_select( + logp_prop - logp_curr, q[idx], curr_val + ) if accepted: logp_curr = logp_prop @@ -361,12 +375,11 @@ def astep(self, q0, logp): @staticmethod def competence(var): - ''' + """ BinaryMetropolis is only suitable for Bernoulli and Categorical variables with k=2. - ''' - distribution = getattr( - var.distribution, 'parent_dist', var.distribution) + """ + distribution = getattr(var.distribution, "parent_dist", var.distribution) if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types): return Competence.IDEAL elif isinstance(distribution, pm.Categorical) and (distribution.k == 2): @@ -382,9 +395,10 @@ class CategoricalGibbsMetropolis(ArrayStep): which was introduced by Liu in his 1996 technical report "Metropolized Gibbs Sampler: An Improvement". """ - name = 'caregorical_gibbs_metropolis' - def __init__(self, vars, proposal='uniform', order='random', model=None): + name = "caregorical_gibbs_metropolis" + + def __init__(self, vars, proposal="uniform", order="random", model=None): model = pm.modelcontext(model) vars = pm.inputvars(vars) @@ -395,34 +409,37 @@ def __init__(self, vars, proposal='uniform', order='random', model=None): # variable with M categories and y being a 3-D variable with N # categories, we will have dimcats = [(0, M), (1, M), (2, N), (3, N), (4, N)]. for v in vars: - distr = getattr(v.distribution, 'parent_dist', v.distribution) + distr = getattr(v.distribution, "parent_dist", v.distribution) if isinstance(distr, pm.Categorical): k = draw_values([distr.k])[0] elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types): k = 2 else: - raise ValueError('All variables must be categorical or binary' + - 'for CategoricalGibbsMetropolis') + raise ValueError( + "All variables must be categorical or binary" + + "for CategoricalGibbsMetropolis" + ) start = len(dimcats) dimcats += [(dim, k) for dim in range(start, start + v.dsize)] - if order == 'random': + if order == "random": self.shuffle_dims = True self.dimcats = dimcats else: if sorted(order) != list(range(len(dimcats))): - raise ValueError('Argument \'order\' has to be a permutation') + raise ValueError("Argument 'order' has to be a permutation") self.shuffle_dims = False self.dimcats = [dimcats[j] for j in order] - if proposal == 'uniform': + if proposal == "uniform": self.astep = self.astep_unif - elif proposal == 'proportional': + elif proposal == "proportional": # Use the optimized "Metropolized Gibbs Sampler" described in Liu96. self.astep = self.astep_prop else: - raise ValueError('Argument \'proposal\' should either be ' + - '\'uniform\' or \'proportional\'') + raise ValueError( + "Argument 'proposal' should either be " + "'uniform' or 'proportional'" + ) super(CategoricalGibbsMetropolis, self).__init__(vars, [model.fastlogp]) @@ -466,8 +483,8 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): log_probs[candidate_cat] = logp(q) probs = softmax(log_probs) prob_curr, probs[given_cat] = probs[given_cat], 0.0 - probs /= (1.0 - prob_curr) - proposed_cat = nr.choice(candidates, p = probs) + probs /= 1.0 - prob_curr + proposed_cat = nr.choice(candidates, p=probs) accept_ratio = (1.0 - prob_curr) / (1.0 - probs[proposed_cat]) if not np.isfinite(accept_ratio) or nr.uniform() >= accept_ratio: q[dim] = given_cat @@ -477,12 +494,11 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k): @staticmethod def competence(var): - ''' + """ CategoricalGibbsMetropolis is only suitable for Bernoulli and Categorical variables. - ''' - distribution = getattr( - var.distribution, 'parent_dist', var.distribution) + """ + distribution = getattr(var.distribution, "parent_dist", var.distribution) if isinstance(distribution, pm.Categorical): if distribution.k > 2: return Competence.IDEAL @@ -526,19 +542,30 @@ class DEMetropolis(PopulationArrayStepShared): Statistics and Computing `link `__ """ - name = 'DEMetropolis' + + name = "DEMetropolis" default_blocked = True generates_stats = True - stats_dtypes = [{ - 'accept': np.float64, - 'tune': np.bool, - }] - - def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, - tune=True, tune_interval=100, model=None, mode=None, **kwargs): - warnings.warn('Population based sampling methods such as DEMetropolis are experimental.' \ - ' Use carefully and be extra critical about their results!') + stats_dtypes = [{"accept": np.float64, "tune": np.bool}] + + def __init__( + self, + vars=None, + S=None, + proposal_dist=None, + lamb=None, + scaling=0.001, + tune=True, + tune_interval=100, + model=None, + mode=None, + **kwargs + ): + warnings.warn( + "Population based sampling methods such as DEMetropolis are experimental." + " Use carefully and be extra critical about their results!" + ) model = pm.modelcontext(model) @@ -554,7 +581,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0 else: self.proposal_dist = UniformProposal(S) - self.scaling = np.atleast_1d(scaling).astype('d') + self.scaling = np.atleast_1d(scaling).astype("d") if lamb is None: lamb = 2.38 / np.sqrt(2 * S.size) self.lamb = float(lamb) @@ -572,8 +599,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0 def astep(self, q0): if not self.steps_until_tune and self.tune: # Tune scaling parameter - self.scaling = tune( - self.scaling, self.accepted / float(self.tune_interval)) + self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 @@ -594,10 +620,7 @@ def astep(self, q0): self.steps_until_tune -= 1 - stats = { - 'tune': self.tune, - 'accept': np.exp(accept), - } + stats = {"tune": self.tune, "accept": np.exp(accept)} return q_new, [stats] @@ -617,14 +640,14 @@ def sample_except(limit, excluded): def softmax(x): e_x = np.exp(x - np.max(x)) - return e_x / np.sum(e_x, axis = 0) + return e_x / np.sum(e_x, axis=0) def delta_logp(logp, vars, shared): [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared) tensor_type = inarray0.type - inarray1 = tensor_type('inarray1') + inarray1 = tensor_type("inarray1") logp1 = pm.CallableTensor(logp0)(inarray1) diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py index 45d71e9098..5c0b437a05 100644 --- a/pymc3/step_methods/sgmcmc.py +++ b/pymc3/step_methods/sgmcmc.py @@ -9,10 +9,12 @@ import theano import numpy as np -__all__ = ['SGFS', 'CSG'] +__all__ = ["SGFS", "CSG"] -EXPERIMENTAL_WARNING = "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet"\ +EXPERIMENTAL_WARNING = ( + "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet" " recommended for use in PyMC3!" +) def _value_error(cond, str): @@ -23,17 +25,17 @@ def _value_error(cond, str): def _check_minibatches(minibatch_tensors, minibatches): _value_error( - isinstance(minibatch_tensors, list), - 'minibatch_tensors must be a list.') + isinstance(minibatch_tensors, list), "minibatch_tensors must be a list." + ) - _value_error( - hasattr(minibatches, "__iter__"), 'minibatches must be an iterator.') + _value_error(hasattr(minibatches, "__iter__"), "minibatches must be an iterator.") def prior_dlogp(vars, model, flat_view): """Returns the gradient of the prior on the parameters as a vector of size D x 1""" terms = tt.concatenate( - [theano.grad(var.logpt, var).flatten() for var in vars], axis=0) + [theano.grad(var.logpt, var).flatten() for var in vars], axis=0 + ) dlogp = theano.clone(terms, flat_view.replacements, strict=False) return dlogp @@ -47,21 +49,23 @@ def elemwise_dlogL(vars, model, flat_view): # select one observed random variable obs_var = model.observed_RVs[0] # tensor of shape (batch_size,) - logL = obs_var.logp_elemwiset.sum( - axis=tuple(range(1, obs_var.logp_elemwiset.ndim))) + logL = obs_var.logp_elemwiset.sum(axis=tuple(range(1, obs_var.logp_elemwiset.ndim))) # calculate fisher information terms = [] for var in vars: - output, _ = theano.scan(lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),\ - sequences=[tt.arange(logL.shape[0])]) + output, _ = theano.scan( + lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(), + sequences=[tt.arange(logL.shape[0])], + ) terms.append(output) dlogL = theano.clone( - tt.concatenate(terms, axis=1), flat_view.replacements, strict=False) + tt.concatenate(terms, axis=1), flat_view.replacements, strict=False + ) return dlogL class BaseStochasticGradient(ArrayStepShared): - R""" + r""" BaseStochasticGradient Object For working with BaseStochasticGradient Object @@ -99,16 +103,18 @@ class BaseStochasticGradient(ArrayStepShared): Returns None it creates class variables which are required for the training fn """ - def __init__(self, - vars=None, - batch_size=None, - total_size=None, - step_size=1.0, - model=None, - random_seed=None, - minibatches=None, - minibatch_tensors=None, - **kwargs): + def __init__( + self, + vars=None, + batch_size=None, + total_size=None, + step_size=1.0, + model=None, + random_seed=None, + minibatches=None, + minibatch_tensors=None, + **kwargs + ): warnings.warn(EXPERIMENTAL_WARNING) model = modelcontext(model) @@ -124,7 +130,8 @@ def __init__(self, self.total_size = total_size _value_error( total_size != None or batch_size != None, - 'total_size and batch_size of training data have to be specified') + "total_size and batch_size of training data have to be specified", + ) self.expected_iter = int(total_size / batch_size) # set random stream @@ -156,12 +163,10 @@ def __init__(self, def is_shared(t): return isinstance(t, theano.compile.sharedvalue.SharedVariable) - tensors = [(t.type() if is_shared(t) else t) - for t in minibatch_tensors] - updates = OrderedDict({ - t: t_ - for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t) - }) + tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] + updates = OrderedDict( + {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)} + ) self.minibatch_tensors = tensors self.inarray += self.minibatch_tensors self.updates.update(updates) @@ -195,14 +200,14 @@ def astep(self, q0): ------- q """ - if hasattr(self, 'minibatch_tensors'): + if hasattr(self, "minibatch_tensors"): return q0 + self.training_fn(q0, *next(self.minibatches)) else: return q0 + self.training_fn(q0) class SGFS(BaseStochasticGradient): - R""" + r""" StochasticGradientFisherScoring Parameters @@ -220,7 +225,7 @@ class SGFS(BaseStochasticGradient): - Bayesian Posterior Sampling via Stochastic Gradient Fisher Scoring Implements Algorithm 1 from the publication http://people.ee.duke.edu/%7Elcarin/782.pdf """ - name = 'stochastic_gradient_fisher_scoring' + name = "stochastic_gradient_fisher_scoring" def __init__(self, vars=None, B=None, step_size_decay=100, **kwargs): """ @@ -238,9 +243,8 @@ def __init__(self, vars=None, B=None, step_size_decay=100, **kwargs): def _initialize_values(self): # Init avg_I - self.avg_I = theano.shared( - np.zeros((self.q_size, self.q_size)), name='avg_I') - self.t = theano.shared(1, name='t') + self.avg_I = theano.shared(np.zeros((self.q_size, self.q_size)), name="avg_I") + self.t = theano.shared(1, name="t") # 2. Set gamma self.gamma = (self.batch_size + self.total_size) / (self.total_size) @@ -265,12 +269,12 @@ def mk_training_fn(self): avg_gt = gt.mean(axis=0) # 6. Calculate approximate Fisher Score - gt_diff = (gt - avg_gt) + gt_diff = gt - avg_gt - V = (1. / (n - 1)) * tt.dot(gt_diff.T, gt_diff) + V = (1.0 / (n - 1)) * tt.dot(gt_diff.T, gt_diff) # 7. Update moving average - I_t = (1. - 1. / t) * avg_I + (1. / t) * V + I_t = (1.0 - 1.0 / t) * avg_I + (1.0 / t) * V if B is None: # if B is not specified @@ -287,23 +291,23 @@ def mk_training_fn(self): # where B_ch is cholesky decomposition of B # i.e. B = dot(B_ch, B_ch^T) B_ch = tt.slinalg.cholesky(B) - noise_term = tt.dot((2.*B_ch)/tt.sqrt(epsilon), \ - random.normal((q_size,), dtype=theano.config.floatX)) + noise_term = tt.dot( + (2.0 * B_ch) / tt.sqrt(epsilon), + random.normal((q_size,), dtype=theano.config.floatX), + ) # 9. # Inv. Fisher Cov. Matrix - cov_mat = (gamma * I_t * N) + ((4. / epsilon) * B) + cov_mat = (gamma * I_t * N) + ((4.0 / epsilon) * B) inv_cov_mat = tt.nlinalg.matrix_inverse(cov_mat) # Noise Coefficient - noise_coeff = (dlog_prior + (N * avg_gt) + noise_term) + noise_coeff = dlog_prior + (N * avg_gt) + noise_term dq = 2 * tt.dot(inv_cov_mat, noise_coeff) updates.update({avg_I: I_t, t: t + 1}) f = theano.function( - outputs=dq, - inputs=inarray, - updates=updates, - allow_input_downcast=True) + outputs=dq, inputs=inarray, updates=updates, allow_input_downcast=True + ) return f @@ -315,7 +319,7 @@ def competence(var, has_grad): class CSG(BaseStochasticGradient): - R""" + r""" CSG: ConstantStochasticGradient It is an approximate stochastic variational inference algorithm @@ -337,7 +341,7 @@ class CSG(BaseStochasticGradient): - Stochastic Gradient Descent as Approximate Bayesian Inference https://arxiv.org/pdf/1704.04289v1.pdf """ - name = 'constant_stochastic_gradient' + name = "constant_stochastic_gradient" def __init__(self, vars=None, **kwargs): """ @@ -351,9 +355,8 @@ def __init__(self, vars=None, **kwargs): def _initialize_values(self): # Init avg_C: Noise Covariance Moving Average - self.avg_C = theano.shared( - np.zeros((self.q_size, self.q_size)), name='avg_C') - self.t = theano.shared(1, name='t') + self.avg_C = theano.shared(np.zeros((self.q_size, self.q_size)), name="avg_C") + self.t = theano.shared(1, name="t") # Init training fn self.training_fn = self.mk_training_fn() @@ -373,36 +376,36 @@ def mk_training_fn(self): inarray = self.inarray # gradient of log likelihood - gt = -1 * (1. / S) * (self.dlogp_elemwise.sum(axis=0) + - (S / N) * self.dlog_prior) + gt = ( + -1 + * (1.0 / S) + * (self.dlogp_elemwise.sum(axis=0) + (S / N) * self.dlog_prior) + ) # update moving average of Noise Covariance - gt_diff = (self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0)) - V = (1. / (S - 1)) * theano.dot(gt_diff.T, gt_diff) - C_t = (1. - 1. / t) * avg_C + (1. / t) * V - # BB^T = C + gt_diff = self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0) + V = (1.0 / (S - 1)) * theano.dot(gt_diff.T, gt_diff) + C_t = (1.0 - 1.0 / t) * avg_C + (1.0 / t) * V + # BB^T = C B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t)) # Optimal Preconditioning Matrix - H = (2. * S / N) * tt.nlinalg.matrix_inverse(C_t) + H = (2.0 * S / N) * tt.nlinalg.matrix_inverse(C_t) # step value on the log likelihood gradient preconditioned with H - step = -1 * theano.dot(H, gt.dimshuffle([0, 'x'])) + step = -1 * theano.dot(H, gt.dimshuffle([0, "x"])) # sample gaussian noise dW - dW = random.normal( - (q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0) + dW = random.normal((q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0) # noise term is inversely proportional to batch size - noise_term = (1. / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW)) + noise_term = (1.0 / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW)) # step + noise term dq = (step + noise_term).flatten() - # update time and avg_C + # update time and avg_C updates.update({avg_C: C_t, t: t + 1}) f = theano.function( - outputs=dq, - inputs=inarray, - updates=updates, - allow_input_downcast=True) + outputs=dq, inputs=inarray, updates=updates, allow_input_downcast=True + ) return f diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py index a68b18fe05..0267d8e9c7 100644 --- a/pymc3/step_methods/slicer.py +++ b/pymc3/step_methods/slicer.py @@ -8,9 +8,9 @@ from ..theanof import inputvars from ..vartypes import continuous_types -__all__ = ['Slice'] +__all__ = ["Slice"] -LOOP_ERR_MSG = 'max slicer iters %d exceeded' +LOOP_ERR_MSG = "max slicer iters %d exceeded" class Slice(ArrayStep): @@ -29,15 +29,17 @@ class Slice(ArrayStep): Optional model for sampling step. Defaults to None (taken from context). """ - name = 'slice' + + name = "slice" default_blocked = False - def __init__(self, vars=None, w=1., tune=True, model=None, - iter_limit=np.inf, **kwargs): + def __init__( + self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, **kwargs + ): self.model = modelcontext(model) self.w = w self.tune = tune - self.n_tunes = 0. + self.n_tunes = 0.0 self.iter_limit = iter_limit if vars is None: @@ -58,13 +60,13 @@ def astep(self, q0, logp): qr[i] = q[i] + self.w[i] # Stepping out procedure cnt = 0 - while(y <= logp(ql)): # changed lt to leq for locally uniform posteriors + while y <= logp(ql): # changed lt to leq for locally uniform posteriors ql[i] -= self.w[i] cnt += 1 if cnt > self.iter_limit: raise RuntimeError(LOOP_ERR_MSG % self.iter_limit) cnt = 0 - while(y <= logp(qr)): + while y <= logp(qr): qr[i] += self.w[i] cnt += 1 if cnt > self.iter_limit: @@ -72,7 +74,9 @@ def astep(self, q0, logp): cnt = 0 q[i] = nr.uniform(ql[i], qr[i]) - while logp(q) < y: # Changed leq to lt, to accomodate for locally flat posteriors + while ( + logp(q) < y + ): # Changed leq to lt, to accomodate for locally flat posteriors # Sample uniformly from slice if q[i] > q0[i]: qr[i] = q[i] @@ -83,11 +87,16 @@ def astep(self, q0, logp): if cnt > self.iter_limit: raise RuntimeError(LOOP_ERR_MSG % self.iter_limit) - if self.tune: # I was under impression from MacKays lectures that slice width can be tuned without + if ( + self.tune + ): # I was under impression from MacKays lectures that slice width can be tuned without # breaking markovianness. Can we do it regardless of self.tune?(@madanh) - self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) +\ - (qr[i] - ql[i]) / (self.n_tunes + 1) # same as before - # unobvious and important: return qr and ql to the same point + self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) + ( + qr[i] - ql[i] + ) / ( + self.n_tunes + 1 + ) # same as before + # unobvious and important: return qr and ql to the same point qr[i] = q[i] ql[i] = q[i] if self.tune: @@ -101,4 +110,3 @@ def competence(var, has_grad): return Competence.PREFERRED return Competence.COMPATIBLE return Competence.INCOMPATIBLE - \ No newline at end of file diff --git a/pymc3/step_methods/smc.py b/pymc3/step_methods/smc.py index 548bf605cb..bf3a560dca 100644 --- a/pymc3/step_methods/smc.py +++ b/pymc3/step_methods/smc.py @@ -15,12 +15,12 @@ from ..backends.base import MultiTrace -__all__ = ['SMC', 'sample_smc'] +__all__ = ["SMC", "sample_smc"] -proposal_dists = {'MultivariateNormal': MultivariateNormalProposal} +proposal_dists = {"MultivariateNormal": MultivariateNormalProposal} -class SMC(): +class SMC: """ Sequential Monte Carlo step @@ -59,8 +59,15 @@ class SMC(): %282007%29133:7%28816%29>`__ """ - def __init__(self, n_steps=5, scaling=1., p_acc_rate=0.01, tune=True, - proposal_name='MultivariateNormal', threshold=0.5): + def __init__( + self, + n_steps=5, + scaling=1.0, + p_acc_rate=0.01, + tune=True, + proposal_name="MultivariateNormal", + threshold=0.5, + ): self.n_steps = n_steps self.scaling = scaling @@ -88,7 +95,9 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed random_seed : int random seed """ - warnings.warn("Warning: SMC is experimental, hopefully it will be ready for PyMC 3.6") + warnings.warn( + "Warning: SMC is experimental, hopefully it will be ready for PyMC 3.6" + ) model = modelcontext(model) if random_seed != -1: @@ -99,19 +108,23 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed acc_rate = 1 model.marginal_likelihood = 1 variables = model.vars - discrete = np.concatenate([[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables]) + discrete = np.concatenate( + [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables] + ) any_discrete = discrete.any() all_discrete = discrete.all() shared = make_shared_replacements(variables, model) prior_logp = logp_forw([model.varlogpt], variables, shared) likelihood_logp = logp_forw([model.datalogpt], variables, shared) - pm._log.info('Sample initial stage: ...') + pm._log.info("Sample initial stage: ...") posterior, var_info = _initial_population(draws, model, variables) while beta < 1: # compute plausibility weights (measure fitness) - likelihoods = np.array([likelihood_logp(sample) for sample in posterior]).squeeze() + likelihoods = np.array( + [likelihood_logp(sample) for sample in posterior] + ).squeeze() beta, old_beta, weights, sj = _calc_beta(beta, likelihoods, step.threshold) model.marginal_likelihood *= sj # resample based on plausibility weights (selection) @@ -127,15 +140,18 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed # acceptance rate if step.tune and stage > 0: if acc_rate == 0: - acc_rate = 1. / step.n_steps + acc_rate = 1.0 / step.n_steps step.scaling = _tune(acc_rate) step.n_steps = 1 + int(np.log(step.p_acc_rate) / np.log(1 - acc_rate)) - pm._log.info('Stage: {:d} Beta: {:f} Steps: {:d} Acc: {:f}'.format(stage, beta, - step.n_steps, acc_rate)) + pm._log.info( + "Stage: {:d} Beta: {:f} Steps: {:d} Acc: {:f}".format( + stage, beta, step.n_steps, acc_rate + ) + ) # Apply Metropolis kernel (mutation) - proposed = 0. - accepted = 0. + proposed = 0.0 + accepted = 0.0 priors = np.array([prior_logp(sample) for sample in posterior]).squeeze() tempered_post = priors + likelihoods * beta for draw in tqdm(range(draws), disable=not progressbar): @@ -147,23 +163,25 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed if any_discrete: if all_discrete: - delta = np.round(delta, 0).astype('int64') - q_old = q_old.astype('int64') - q_new = (q_old + delta).astype('int64') + delta = np.round(delta, 0).astype("int64") + q_old = q_old.astype("int64") + q_new = (q_old + delta).astype("int64") else: delta[discrete] = np.round(delta[discrete], 0) - q_new = (q_old + delta) + q_new = q_old + delta else: q_new = floatX(q_old + delta) new_tempered_post = prior_logp(q_new) + likelihood_logp(q_new)[0] * beta - q_old, accept = metrop_select(new_tempered_post - old_tempered_post, q_new, q_old) + q_old, accept = metrop_select( + new_tempered_post - old_tempered_post, q_new, q_old + ) if accept: accepted += accept posterior[draw] = q_old old_tempered_post = new_tempered_post - proposed += 1. + proposed += 1.0 acc_rate = accepted / proposed stage += 1 @@ -219,11 +237,11 @@ def _calc_beta(beta, likelihoods, threshold=0.5): Partial marginal likelihood """ low_beta = old_beta = beta - up_beta = 2. + up_beta = 2.0 rN = int(len(likelihoods) * threshold) while up_beta - low_beta > 1e-6: - new_beta = (low_beta + up_beta) / 2. + new_beta = (low_beta + up_beta) / 2.0 weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max())) weights = weights_un / np.sum(weights_un) ESS = int(1 / np.sum(weights ** 2)) @@ -245,7 +263,9 @@ def _calc_covariance(posterior_array, weights): """ Calculate trace covariance matrix based on importance weights. """ - cov = np.cov(np.squeeze(posterior_array), aweights=weights.ravel(), bias=False, rowvar=0) + cov = np.cov( + np.squeeze(posterior_array), aweights=weights.ravel(), bias=False, rowvar=0 + ) if np.isnan(cov).any() or np.isinf(cov).any(): raise ValueError('Sample covariances not valid! Likely "chains" is too small!') return np.atleast_2d(cov) @@ -265,8 +285,8 @@ def _tune(acc_rate): scaling: float """ # a and b after Muto & Beck 2008 . - a = 1. / 9 - b = 8. / 9 + a = 1.0 / 9 + b = 8.0 / 9 return (a + b * acc_rate) ** 2 @@ -285,7 +305,7 @@ def _posterior_to_trace(posterior, model, var_info): size = 0 for var in varnames: shape, new_size = var_info[var] - value.append(posterior[i][size:size+new_size].reshape(shape)) + value.append(posterior[i][size : size + new_size].reshape(shape)) size += new_size strace.record({k: v for k, v in zip(varnames, value)}) return MultiTrace([strace]) diff --git a/pymc3/step_methods/step_sizes.py b/pymc3/step_methods/step_sizes.py index 6cc3de5a87..502f46e160 100644 --- a/pymc3/step_methods/step_sizes.py +++ b/pymc3/step_methods/step_sizes.py @@ -9,7 +9,7 @@ def __init__(self, initial_step, target, gamma, k, t0): self._log_step = np.log(initial_step) self._log_bar = self._log_step self._target = target - self._hbar = 0. + self._hbar = 0.0 self._k = k self._t0 = t0 self._count = 1 @@ -29,8 +29,8 @@ def update(self, accept_stat, tune): return count, k, t0 = self._count, self._k, self._t0 - w = 1. / (count + t0) - self._hbar = ((1 - w) * self._hbar + w * (self._target - accept_stat)) + w = 1.0 / (count + t0) + self._hbar = (1 - w) * self._hbar + w * (self._target - accept_stat) self._log_step = self._mu - self._hbar * np.sqrt(count) / self._gamma mk = count ** -k @@ -39,8 +39,8 @@ def update(self, accept_stat, tune): def stats(self): return { - 'step_size': np.exp(self._log_step), - 'step_size_bar': np.exp(self._log_bar), + "step_size": np.exp(self._log_step), + "step_size_bar": np.exp(self._log_bar), } def warnings(self): @@ -53,13 +53,15 @@ def warnings(self): n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95) if target_accept < lower or target_accept > upper: - msg = ('The acceptance probability does not match the target. It ' - 'is %s, but should be close to %s. Try to increase the ' - 'number of tuning steps.' - % (mean_accept, target_accept)) - info = {'target': target_accept, 'actual': mean_accept} + msg = ( + "The acceptance probability does not match the target. It " + "is %s, but should be close to %s. Try to increase the " + "number of tuning steps." % (mean_accept, target_accept) + ) + info = {"target": target_accept, "actual": mean_accept} warning = SamplerWarning( - WarningType.BAD_ACCEPTANCE, msg, 'warn', None, None, info) + WarningType.BAD_ACCEPTANCE, msg, "warn", None, None, info + ) return [warning] else: return [] diff --git a/pymc3/tests/backend_fixtures.py b/pymc3/tests/backend_fixtures.py index 6da3d33539..87e4137650 100644 --- a/pymc3/tests/backend_fixtures.py +++ b/pymc3/tests/backend_fixtures.py @@ -33,7 +33,7 @@ def setup_method(self): with self.model: self.strace = self.backend(self.name) self.draws, self.chain = 3, 0 - if not hasattr(self, 'sampler_vars'): + if not hasattr(self, "sampler_vars"): self.sampler_vars = None if self.sampler_vars is not None: assert self.strace.supports_sampler_stats @@ -46,11 +46,11 @@ def test_append_invalid(self): with pytest.raises(ValueError): self.strace.setup(self.draws, self.chain) with pytest.raises(ValueError): - vars = self.sampler_vars + [{'a': np.bool}] + vars = self.sampler_vars + [{"a": np.bool}] self.strace.setup(self.draws, self.chain, vars) else: with pytest.raises((ValueError, TypeError)): - self.strace.setup(self.draws, self.chain, [{'a': np.bool}]) + self.strace.setup(self.draws, self.chain, [{"a": np.bool}]) def test_append(self): if self.sampler_vars is None: @@ -82,20 +82,21 @@ class StatsTestCase(object): - name - shape """ + def setup_method(self): self.test_point, self.model, _ = models.beta_bernoulli(self.shape) self.draws, self.chain = 3, 0 def test_bad_dtype(self): - bad_vars = [{'a': np.float64}, {'a': np.bool}] - good_vars = [{'a': np.float64}, {'a': np.float64}] + bad_vars = [{"a": np.float64}, {"a": np.bool}] + good_vars = [{"a": np.float64}, {"a": np.float64}] with self.model: strace = self.backend(self.name) with pytest.raises((ValueError, TypeError)): strace.setup(self.draws, self.chain, bad_vars) strace.setup(self.draws, self.chain, good_vars) if strace.supports_sampler_stats: - assert strace.stat_names == set(['a']) + assert strace.stat_names == set(["a"]) else: with pytest.raises((ValueError, TypeError)): strace.setup(self.draws, self.chain, good_vars) @@ -125,6 +126,7 @@ class ModelBackendSampledTestCase(object): Children may define - sampler_vars """ + @classmethod def setup_class(cls): cls.test_point, cls.model, _ = models.beta_bernoulli(cls.shape) @@ -132,7 +134,7 @@ def setup_class(cls): strace0 = cls.backend(cls.name) strace1 = cls.backend(cls.name) - if not hasattr(cls, 'sampler_vars'): + if not hasattr(cls, "sampler_vars"): cls.sampler_vars = None cls.draws = 5 @@ -144,16 +146,15 @@ def setup_class(cls): strace1.setup(cls.draws, chain=1) varnames = list(cls.test_point.keys()) - shapes = {varname: value.shape - for varname, value in cls.test_point.items()} - dtypes = {varname: value.dtype - for varname, value in cls.test_point.items()} + shapes = {varname: value.shape for varname, value in cls.test_point.items()} + dtypes = {varname: value.dtype for varname, value in cls.test_point.items()} cls.expected = {0: {}, 1: {}} for varname in varnames: mcmc_shape = (cls.draws,) + shapes[varname] - values = np.arange(cls.draws * np.prod(shapes[varname]), - dtype=dtypes[varname]) + values = np.arange( + cls.draws * np.prod(shapes[varname]), dtype=dtypes[varname] + ) cls.expected[0][varname] = values.reshape(mcmc_shape) cls.expected[1][varname] = values.reshape(mcmc_shape) * 100 @@ -169,17 +170,22 @@ def setup_class(cls): else: stats[key] = np.arange(cls.draws, dtype=dtype) - for idx in range(cls.draws): - point0 = {varname: cls.expected[0][varname][idx, ...] - for varname in varnames} - point1 = {varname: cls.expected[1][varname][idx, ...] - for varname in varnames} + point0 = { + varname: cls.expected[0][varname][idx, ...] for varname in varnames + } + point1 = { + varname: cls.expected[1][varname][idx, ...] for varname in varnames + } if cls.sampler_vars is not None: - stats1 = [dict((key, val[idx]) for key, val in stats.items()) - for stats in cls.expected_stats[0]] - stats2 = [dict((key, val[idx]) for key, val in stats.items()) - for stats in cls.expected_stats[1]] + stats1 = [ + dict((key, val[idx]) for key, val in stats.items()) + for stats in cls.expected_stats[0] + ] + stats2 = [ + dict((key, val[idx]) for key, val in stats.items()) + for stats in cls.expected_stats[1] + ] strace0.record(point=point0, sampler_stats=stats1) strace1.record(point=point1, sampler_stats=stats2) else: @@ -223,27 +229,37 @@ class SamplingTestCase(ModelBackendSetupTestCase): """ def record_point(self, val): - point = {varname: np.tile(val, value.shape) - for varname, value in self.test_point.items()} + point = { + varname: np.tile(val, value.shape) + for varname, value in self.test_point.items() + } if self.sampler_vars is not None: - stats = [dict((key, dtype(val)) for key, dtype in vars.items()) - for vars in self.sampler_vars] + stats = [ + dict((key, dtype(val)) for key, dtype in vars.items()) + for vars in self.sampler_vars + ] self.strace.record(point=point, sampler_stats=stats) else: self.strace.record(point=point) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_standard_close(self): for idx in range(self.draws): self.record_point(idx) self.strace.close() for varname in self.test_point.keys(): - npt.assert_equal(self.strace.get_values(varname)[0, ...], - np.zeros(self.strace.var_shapes[varname])) + npt.assert_equal( + self.strace.get_values(varname)[0, ...], + np.zeros(self.strace.var_shapes[varname]), + ) last_idx = self.draws - 1 - npt.assert_equal(self.strace.get_values(varname)[last_idx, ...], - np.tile(last_idx, self.strace.var_shapes[varname])) + npt.assert_equal( + self.strace.get_values(varname)[last_idx, ...], + np.tile(last_idx, self.strace.var_shapes[varname]), + ) if self.sampler_vars: for varname in self.strace.stat_names: vals = self.strace.get_sampler_stats(varname) @@ -272,41 +288,56 @@ class SelectionTestCase(ModelBackendSampledTestCase): - shape """ - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_get_values_default(self): for varname in self.test_point.keys(): - expected = np.concatenate([self.expected[chain][varname] - for chain in [0, 1]]) + expected = np.concatenate( + [self.expected[chain][varname] for chain in [0, 1]] + ) result = self.mtrace.get_values(varname) npt.assert_equal(result, expected) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_get_values_nocombine_burn_keyword(self): burn = 2 for varname in self.test_point.keys(): - expected = [self.expected[0][varname][burn:], - self.expected[1][varname][burn:]] + expected = [ + self.expected[0][varname][burn:], + self.expected[1][varname][burn:], + ] result = self.mtrace.get_values(varname, burn=burn, combine=False) npt.assert_equal(result, expected) def test_len(self): assert len(self.mtrace) == self.draws - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_dtypes(self): for varname in self.test_point.keys(): - assert self.expected[0][varname].dtype == \ - self.mtrace.get_values(varname, chains=0).dtype + assert ( + self.expected[0][varname].dtype + == self.mtrace.get_values(varname, chains=0).dtype + ) for statname in self.mtrace.stat_names: - assert self.stat_dtypes[statname] == \ - self.mtrace.get_sampler_stats(statname, chains=0).dtype + assert ( + self.stat_dtypes[statname] + == self.mtrace.get_sampler_stats(statname, chains=0).dtype + ) def test_get_values_nocombine_thin_keyword(self): thin = 2 for varname in self.test_point.keys(): - expected = [self.expected[0][varname][::thin], - self.expected[1][varname][::thin]] + expected = [ + self.expected[0][varname][::thin], + self.expected[1][varname][::thin], + ] result = self.mtrace.get_values(varname, thin=thin, combine=False) npt.assert_equal(result, expected) @@ -320,13 +351,18 @@ def test_get_point(self): def test_get_slice(self): expected = [] for chain in [0, 1]: - expected.append({varname: self.expected[chain][varname][2:] - for varname in self.mtrace.varnames}) + expected.append( + { + varname: self.expected[chain][varname][2:] + for varname in self.mtrace.varnames + } + ) result = self.mtrace[2:] for chain in [0, 1]: for varname in self.test_point.keys(): - npt.assert_equal(result.get_values(varname, chains=[chain]), - expected[chain][varname]) + npt.assert_equal( + result.get_values(varname, chains=[chain]), expected[chain][varname] + ) def test_get_slice_step(self): result = self.mtrace[:] @@ -335,9 +371,8 @@ def test_get_slice_step(self): result = self.mtrace[::2] assert len(result) == self.draws // 2 - def test_get_slice_neg_step(self): - if hasattr(self, 'skip_test_get_slice_neg_step'): + if hasattr(self, "skip_test_get_slice_neg_step"): return result = self.mtrace[::-1] @@ -346,17 +381,21 @@ def test_get_slice_neg_step(self): result = self.mtrace[::-2] assert len(result) == self.draws // 2 - def test_get_neg_slice(self): expected = [] for chain in [0, 1]: - expected.append({varname: self.expected[chain][varname][-2:] - for varname in self.mtrace.varnames}) + expected.append( + { + varname: self.expected[chain][varname][-2:] + for varname in self.mtrace.varnames + } + ) result = self.mtrace[-2:] for chain in [0, 1]: for varname in self.test_point.keys(): - npt.assert_equal(result.get_values(varname, chains=[chain]), - expected[chain][varname]) + npt.assert_equal( + result.get_values(varname, chains=[chain]), expected[chain][varname] + ) def test_get_values_one_chain(self): for varname in self.test_point.keys(): @@ -367,8 +406,7 @@ def test_get_values_one_chain(self): def test_get_values_nocombine_chains_reversed(self): for varname in self.test_point.keys(): expected = [self.expected[1][varname], self.expected[0][varname]] - result = self.mtrace.get_values(varname, chains=[1, 0], - combine=False) + result = self.mtrace.get_values(varname, chains=[1, 0], combine=False) npt.assert_equal(result, expected) def test_nchains(self): @@ -376,51 +414,54 @@ def test_nchains(self): def test_get_values_one_chain_int_arg(self): for varname in self.test_point.keys(): - npt.assert_equal(self.mtrace.get_values(varname, chains=[0]), - self.mtrace.get_values(varname, chains=0)) + npt.assert_equal( + self.mtrace.get_values(varname, chains=[0]), + self.mtrace.get_values(varname, chains=0), + ) def test_get_values_combine(self): for varname in self.test_point.keys(): - expected = np.concatenate([self.expected[chain][varname] - for chain in [0, 1]]) + expected = np.concatenate( + [self.expected[chain][varname] for chain in [0, 1]] + ) result = self.mtrace.get_values(varname, combine=True) npt.assert_equal(result, expected) def test_get_values_combine_burn_arg(self): burn = 2 for varname in self.test_point.keys(): - expected = np.concatenate([self.expected[chain][varname][burn:] - for chain in [0, 1]]) + expected = np.concatenate( + [self.expected[chain][varname][burn:] for chain in [0, 1]] + ) result = self.mtrace.get_values(varname, combine=True, burn=burn) npt.assert_equal(result, expected) def test_get_values_combine_thin_arg(self): thin = 2 for varname in self.test_point.keys(): - expected = np.concatenate([self.expected[chain][varname][::thin] - for chain in [0, 1]]) + expected = np.concatenate( + [self.expected[chain][varname][::thin] for chain in [0, 1]] + ) result = self.mtrace.get_values(varname, combine=True, thin=thin) npt.assert_equal(result, expected) def test_getitem_equivalence(self): mtrace = self.mtrace for varname in self.test_point.keys(): - npt.assert_equal(mtrace[varname], - mtrace.get_values(varname, combine=True)) - npt.assert_equal(mtrace[varname, 2:], - mtrace.get_values(varname, burn=2, - combine=True)) - npt.assert_equal(mtrace[varname, 2::2], - mtrace.get_values(varname, burn=2, thin=2, - combine=True)) + npt.assert_equal(mtrace[varname], mtrace.get_values(varname, combine=True)) + npt.assert_equal( + mtrace[varname, 2:], mtrace.get_values(varname, burn=2, combine=True) + ) + npt.assert_equal( + mtrace[varname, 2::2], + mtrace.get_values(varname, burn=2, thin=2, combine=True), + ) def test_selection_method_equivalence(self): varname = self.mtrace.varnames[0] mtrace = self.mtrace - npt.assert_equal(mtrace.get_values(varname), - mtrace[varname]) - npt.assert_equal(mtrace[varname], - mtrace.__getattr__(varname)) + npt.assert_equal(mtrace.get_values(varname), mtrace[varname]) + npt.assert_equal(mtrace[varname], mtrace.__getattr__(varname)) class DumpLoadTestCase(ModelBackendSampledTestCase): @@ -433,6 +474,7 @@ class DumpLoadTestCase(ModelBackendSampledTestCase): - name - shape """ + @classmethod def setup_class(cls): super(DumpLoadTestCase, cls).setup_class() @@ -475,6 +517,7 @@ class BackendEqualityTestCase(ModelBackendSampledTestCase): - name1 - shape """ + @classmethod def setup_class(cls): cls.backend = cls.backend0 @@ -497,18 +540,20 @@ def test_chain_length(self): assert self.mtrace0.nchains == self.mtrace1.nchains assert len(self.mtrace0) == len(self.mtrace1) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_dtype(self): for varname in self.test_point.keys(): - assert self.mtrace0.get_values(varname, chains=0).dtype == \ - self.mtrace1.get_values(varname, chains=0).dtype + assert ( + self.mtrace0.get_values(varname, chains=0).dtype + == self.mtrace1.get_values(varname, chains=0).dtype + ) def test_number_of_draws(self): for varname in self.test_point.keys(): - values0 = self.mtrace0.get_values(varname, combine=False, - squeeze=False) - values1 = self.mtrace1.get_values(varname, combine=False, - squeeze=False) + values0 = self.mtrace0.get_values(varname, combine=False, squeeze=False) + values1 = self.mtrace1.get_values(varname, combine=False, squeeze=False) assert values0[0].shape[0] == self.draws assert values1[0].shape[0] == self.draws @@ -519,59 +564,61 @@ def test_get_item(self): def test_get_values(self): for varname in self.test_point.keys(): for cf in [False, True]: - npt.assert_equal(self.mtrace0.get_values(varname, combine=cf), - self.mtrace1.get_values(varname, combine=cf)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=cf), + self.mtrace1.get_values(varname, combine=cf), + ) def test_get_values_no_squeeze(self): for varname in self.test_point.keys(): - npt.assert_equal(self.mtrace0.get_values(varname, combine=False, - squeeze=False), - self.mtrace1.get_values(varname, combine=False, - squeeze=False)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=False, squeeze=False), + self.mtrace1.get_values(varname, combine=False, squeeze=False), + ) def test_get_values_combine_and_no_squeeze(self): for varname in self.test_point.keys(): - npt.assert_equal(self.mtrace0.get_values(varname, combine=True, - squeeze=False), - self.mtrace1.get_values(varname, combine=True, - squeeze=False)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=True, squeeze=False), + self.mtrace1.get_values(varname, combine=True, squeeze=False), + ) def test_get_values_with_burn(self): for varname in self.test_point.keys(): for cf in [False, True]: - npt.assert_equal(self.mtrace0.get_values(varname, combine=cf, - burn=3), - self.mtrace1.get_values(varname, combine=cf, - burn=3)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=cf, burn=3), + self.mtrace1.get_values(varname, combine=cf, burn=3), + ) # Burn to one value. - npt.assert_equal(self.mtrace0.get_values(varname, combine=cf, - burn=self.draws - 1), - self.mtrace1.get_values(varname, combine=cf, - burn=self.draws - 1)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=cf, burn=self.draws - 1), + self.mtrace1.get_values(varname, combine=cf, burn=self.draws - 1), + ) def test_get_values_with_thin(self): for varname in self.test_point.keys(): for cf in [False, True]: - npt.assert_equal(self.mtrace0.get_values(varname, combine=cf, - thin=2), - self.mtrace1.get_values(varname, combine=cf, - thin=2)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=cf, thin=2), + self.mtrace1.get_values(varname, combine=cf, thin=2), + ) def test_get_values_with_burn_and_thin(self): for varname in self.test_point.keys(): for cf in [False, True]: - npt.assert_equal(self.mtrace0.get_values(varname, combine=cf, - burn=2, thin=2), - self.mtrace1.get_values(varname, combine=cf, - burn=2, thin=2)) + npt.assert_equal( + self.mtrace0.get_values(varname, combine=cf, burn=2, thin=2), + self.mtrace1.get_values(varname, combine=cf, burn=2, thin=2), + ) def test_get_values_with_chains_arg(self): for varname in self.test_point.keys(): for cf in [False, True]: - npt.assert_equal(self.mtrace0.get_values(varname, chains=[0], - combine=cf), - self.mtrace1.get_values(varname, chains=[0], - combine=cf)) + npt.assert_equal( + self.mtrace0.get_values(varname, chains=[0], combine=cf), + self.mtrace1.get_values(varname, chains=[0], combine=cf), + ) def test_get_point(self): npoint, spoint = self.mtrace0[4], self.mtrace1[4] diff --git a/pymc3/tests/checks.py b/pymc3/tests/checks.py index 292cc5fe7f..f8f2dae232 100644 --- a/pymc3/tests/checks.py +++ b/pymc3/tests/checks.py @@ -2,12 +2,12 @@ def close_to(x, v, bound, name="value"): - assert np.all(np.logical_or( - np.abs(x - v) < bound, - x == v)), name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound) + assert np.all(np.logical_or(np.abs(x - v) < bound, x == v)), ( + name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound) + ) -def close_to_logical(x, v, bound, name="value"): - assert np.all(np.logical_or( - np.abs(np.bitwise_xor(x, v)) < bound, - x == v)), name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound) +def close_to_logical(x, v, bound, name="value"): + assert np.all(np.logical_or(np.abs(np.bitwise_xor(x, v)) < bound, x == v)), ( + name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound) + ) diff --git a/pymc3/tests/conftest.py b/pymc3/tests/conftest.py index 481fcba133..d628ce58c7 100644 --- a/pymc3/tests/conftest.py +++ b/pymc3/tests/conftest.py @@ -6,31 +6,29 @@ @pytest.fixture(scope="function", autouse=True) def theano_config(): - config = theano.configparser.change_flags(compute_test_value='raise') + config = theano.configparser.change_flags(compute_test_value="raise") with config: yield -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def exception_verbosity(): - config = theano.configparser.change_flags( - exception_verbosity='high') + config = theano.configparser.change_flags(exception_verbosity="high") with config: yield -@pytest.fixture(scope='function', autouse=False) +@pytest.fixture(scope="function", autouse=False) def strict_float32(): - if theano.config.floatX == 'float32': - config = theano.configparser.change_flags( - warn_float64='raise') + if theano.config.floatX == "float32": + config = theano.configparser.change_flags(warn_float64="raise") with config: yield else: yield -@pytest.fixture('function', autouse=False) +@pytest.fixture("function", autouse=False) def seeded_test(): # TODO: use this instead of SeededTest np.random.seed(42) diff --git a/pymc3/tests/helpers.py b/pymc3/tests/helpers.py index 621d9844c0..0a30e8a58b 100644 --- a/pymc3/tests/helpers.py +++ b/pymc3/tests/helpers.py @@ -51,7 +51,7 @@ def matches(self, **kwargs): class Matcher(object): - _partial_matches = ('msg', 'message') + _partial_matches = ("msg", "message") def matches(self, d, **kwargs): """ @@ -77,7 +77,7 @@ def match_value(self, k, dv, v): if type(v) != type(dv): result = False elif type(dv) is not str or k not in self._partial_matches: - result = (v == dv) + result = v == dv else: result = dv.find(v) >= 0 return result diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py index 117d847c3c..d30a93b182 100644 --- a/pymc3/tests/models.py +++ b/pymc3/tests/models.py @@ -12,16 +12,16 @@ def simple_model(): mu = -2.1 tau = 1.3 with Model() as model: - Normal('x', mu, tau=tau, shape=2, testval=tt.ones(2) * .1) + Normal("x", mu, tau=tau, shape=2, testval=tt.ones(2) * 0.1) - return model.test_point, model, (mu, tau ** -.5) + return model.test_point, model, (mu, tau ** -0.5) def simple_categorical(): p = floatX_array([0.1, 0.2, 0.3, 0.4]) v = floatX_array([0.0, 1.0, 2.0, 3.0]) with Model() as model: - Categorical('x', p, shape=3, testval=[1, 2, 3]) + Categorical("x", p, shape=3, testval=[1, 2, 3]) mu = np.dot(p, v) var = np.dot(p, (v - mu) ** 2) @@ -32,9 +32,9 @@ def multidimensional_model(): mu = -2.1 tau = 1.3 with Model() as model: - Normal('x', mu, tau=tau, shape=(3, 2), testval=.1 * tt.ones((3, 2))) + Normal("x", mu, tau=tau, shape=(3, 2), testval=0.1 * tt.ones((3, 2))) - return model.test_point, model, (mu, tau ** -.5) + return model.test_point, model, (mu, tau ** -0.5) def simple_arbitrary_det(): @@ -45,27 +45,27 @@ def arbitrary_det(value): return value with Model() as model: - a = Normal('a') + a = Normal("a") b = arbitrary_det(a) - Normal('obs', mu=b.astype('float64'), observed=floatX_array([1, 3, 5])) + Normal("obs", mu=b.astype("float64"), observed=floatX_array([1, 3, 5])) return model.test_point, model def simple_init(): start, model, moments = simple_model() - step = Metropolis(model.vars, np.diag([1.]), model=model) + step = Metropolis(model.vars, np.diag([1.0]), model=model) return model, start, step, moments def simple_2model(): mu = -2.1 tau = 1.3 - p = .4 + p = 0.4 with Model() as model: - x = pm.Normal('x', mu, tau=tau, testval=.1) - pm.Deterministic('logx', tt.log(x)) - pm.Bernoulli('y', p) + x = pm.Normal("x", mu, tau=tau, testval=0.1) + pm.Deterministic("logx", tt.log(x)) + pm.Bernoulli("y", p) return model.test_point, model @@ -73,22 +73,24 @@ def simple_2model_continuous(): mu = -2.1 tau = 1.3 with Model() as model: - x = pm.Normal('x', mu, tau=tau, testval=.1) - pm.Deterministic('logx', tt.log(x)) - pm.Beta('y', alpha=1, beta=1, shape=2) + x = pm.Normal("x", mu, tau=tau, testval=0.1) + pm.Deterministic("logx", tt.log(x)) + pm.Beta("y", alpha=1, beta=1, shape=2) return model.test_point, model def mv_simple(): - mu = floatX_array([-.1, .5, 1.1]) - p = floatX_array([ - [2., 0, 0], - [.05, .1, 0], - [1., -0.05, 5.5]]) + mu = floatX_array([-0.1, 0.5, 1.1]) + p = floatX_array([[2.0, 0, 0], [0.05, 0.1, 0], [1.0, -0.05, 5.5]]) tau = np.dot(p, p.T) with pm.Model() as model: - pm.MvNormal('x', tt.constant(mu), tau=tt.constant(tau), - shape=3, testval=floatX_array([.1, 1., .8])) + pm.MvNormal( + "x", + tt.constant(mu), + tau=tt.constant(tau), + shape=3, + testval=floatX_array([0.1, 1.0, 0.8]), + ) H = tau C = np.linalg.inv(H) return model.test_point, model, (mu, C) @@ -97,9 +99,9 @@ def mv_simple(): def mv_simple_discrete(): d = 2 n = 5 - p = floatX_array([.15, .85]) + p = floatX_array([0.15, 0.85]) with pm.Model() as model: - pm.Multinomial('x', n, tt.constant(p), shape=d, testval=np.array([1, 4])) + pm.Multinomial("x", n, tt.constant(p), shape=d, testval=np.array([1, 4])) mu = n * p # covariance matrix C = np.zeros((d, d)) @@ -132,30 +134,29 @@ def mv_prior_simple(): std_post = (K - np.dot(v.T, v)).diagonal() ** 0.5 with pm.Model() as model: - x = pm.Flat('x', shape=n) - x_obs = pm.MvNormal('x_obs', observed=obs, mu=x, - cov=noise * np.eye(n), shape=n) + x = pm.Flat("x", shape=n) + x_obs = pm.MvNormal("x_obs", observed=obs, mu=x, cov=noise * np.eye(n), shape=n) return model.test_point, model, (K, L, mu_post, std_post, noise) def non_normal(n=2): with pm.Model() as model: - pm.Beta('x', 3, 3, shape=n, transform=None) - return model.test_point, model, (np.tile([.5], n), None) + pm.Beta("x", 3, 3, shape=n, transform=None) + return model.test_point, model, (np.tile([0.5], n), None) def exponential_beta(n=2): with pm.Model() as model: - pm.Beta('x', 3, 1, shape=n, transform=None) - pm.Exponential('y', 1, shape=n, transform=None) + pm.Beta("x", 3, 1, shape=n, transform=None) + pm.Exponential("y", 1, shape=n, transform=None) return model.test_point, model, None def beta_bernoulli(n=2): with pm.Model() as model: - pm.Beta('x', 3, 1, shape=n, transform=None) - pm.Bernoulli('y', 0.5) + pm.Beta("x", 3, 1, shape=n, transform=None) + pm.Bernoulli("y", 0.5) return model.test_point, model, None diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py index 78f7ee8c52..484de588a5 100644 --- a/pymc3/tests/sampler_fixtures.py +++ b/pymc3/tests/sampler_fixtures.py @@ -29,12 +29,12 @@ def test_kstest(self): for varname, cdf in self.cdfs.items(): samples = self.samples[varname] if samples.ndim == 1: - t, p = stats.kstest(samples[::self.ks_thin], cdf=cdf) + t, p = stats.kstest(samples[:: self.ks_thin], cdf=cdf) assert self.alpha < p elif samples.ndim == 2: pvals = [] for samples_, cdf_ in zip(samples.T, cdf): - t, p = stats.kstest(samples_[::self.ks_thin], cdf=cdf_) + t, p = stats.kstest(samples_[:: self.ks_thin], cdf=cdf_) pvals.append(p) t, p = stats.combine_pvalues(pvals) assert self.alpha < p @@ -42,11 +42,10 @@ def test_kstest(self): raise NotImplementedError() - class UniformFixture(KnownMean, KnownVariance, KnownCDF): - means = {'a': 0} - variances = {'a': 1.0 / 3} - cdfs = {'a': stats.uniform(-1, 2).cdf} + means = {"a": 0} + variances = {"a": 1.0 / 3} + cdfs = {"a": stats.uniform(-1, 2).cdf} @classmethod def make_model(cls): @@ -57,9 +56,9 @@ def make_model(cls): class NormalFixture(KnownMean, KnownVariance, KnownCDF): - means = {'a': 2 * np.ones(10)} - variances = {'a': 3 * np.ones(10)} - cdfs = {'a': [stats.norm(2, np.sqrt(3)).cdf for _ in range(10)]} + means = {"a": 2 * np.ones(10)} + variances = {"a": 3 * np.ones(10)} + cdfs = {"a": [stats.norm(2, np.sqrt(3)).cdf for _ in range(10)]} @classmethod def make_model(cls): @@ -69,20 +68,19 @@ def make_model(cls): class BetaBinomialFixture(KnownCDF): - cdfs = {'p': [stats.beta(a, b).cdf - for a, b in zip([1.5, 2.5, 10], [3.5, 10.5, 1])]} + cdfs = {"p": [stats.beta(a, b).cdf for a, b in zip([1.5, 2.5, 10], [3.5, 10.5, 1])]} @classmethod def make_model(cls): with pm.Model() as model: - p = pm.Beta("p", [0.5, 0.5, 1.], [0.5, 0.5, 1.], shape=3) + p = pm.Beta("p", [0.5, 0.5, 1.0], [0.5, 0.5, 1.0], shape=3) pm.Binomial("y", p=p, n=[4, 12, 9], observed=[1, 2, 9]) return model class StudentTFixture(KnownMean, KnownCDF): - means = {'a': 0} - cdfs = {'a': stats.t(df=4).cdf} + means = {"a": 0} + cdfs = {"a": stats.t(df=4).cdf} ks_thin = 10 @classmethod @@ -94,14 +92,12 @@ def make_model(cls): class LKJCholeskyCovFixture(KnownCDF): cdfs = { - 'log_stds': [stats.norm(loc=x, scale=x / 10.).cdf - for x in [1, 2, 3, 4, 5]], + "log_stds": [stats.norm(loc=x, scale=x / 10.0).cdf for x in [1, 2, 3, 4, 5]], # The entries of the correlation matrix should follow # beta(eta - 1 + d/2, eta - 1 + d/2) on (-1, 1). # See https://arxiv.org/abs/1309.7268 - 'corr_entries_unit': [ - stats.beta(3 - 1 + 2.5, 3 - 1 + 2.5).cdf - for _ in range(10) + "corr_entries_unit": [ + stats.beta(3 - 1 + 2.5, 3 - 1 + 2.5).cdf for _ in range(10) ], } @@ -109,15 +105,15 @@ class LKJCholeskyCovFixture(KnownCDF): def make_model(cls): with pm.Model() as model: sd_mu = np.array([1, 2, 3, 4, 5]) - sd_dist = pm.Lognormal.dist(mu=sd_mu, sd=sd_mu / 10., shape=5) - chol_packed = pm.LKJCholeskyCov('chol_packed', eta=3, n=5, sd_dist=sd_dist) + sd_dist = pm.Lognormal.dist(mu=sd_mu, sd=sd_mu / 10.0, shape=5) + chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) cov = tt.dot(chol, chol.T) stds = tt.sqrt(tt.diag(cov)) - pm.Deterministic('log_stds', tt.log(stds)) + pm.Deterministic("log_stds", tt.log(stds)) corr = cov / stds[None, :] / stds[:, None] corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2 - pm.Deterministic('corr_entries_unit', corr_entries_unit) + pm.Deterministic("corr_entries_unit", corr_entries_unit) return model @@ -128,19 +124,21 @@ def setup_class(cls): cls.model = cls.make_model() with cls.model: cls.step = cls.make_step() - cls.trace = pm.sample(cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains) + cls.trace = pm.sample( + cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains + ) cls.samples = {} for var in cls.model.unobserved_RVs: cls.samples[str(var)] = cls.trace.get_values(var, burn=cls.burn) def test_neff(self): - if hasattr(self, 'min_n_eff'): - n_eff = pm.effective_n(self.trace[self.burn:]) + if hasattr(self, "min_n_eff"): + n_eff = pm.effective_n(self.trace[self.burn :]) for var in n_eff: npt.assert_array_less(self.min_n_eff, n_eff[var]) def test_Rhat(self): - rhat = pm.gelman_rubin(self.trace[self.burn:]) + rhat = pm.gelman_rubin(self.trace[self.burn :]) for var in rhat: npt.assert_allclose(rhat[var], 1, rtol=0.01) @@ -149,16 +147,16 @@ class NutsFixture(BaseSampler): @classmethod def make_step(cls): args = {} - if hasattr(cls, 'step_args'): + if hasattr(cls, "step_args"): args.update(cls.step_args) - if 'scaling' not in args: + if "scaling" not in args: _, step = pm.sampling.init_nuts(n_init=10000, **args) else: step = pm.NUTS(**args) return step def test_target_accept(self): - accept = self.trace[self.burn:]['mean_tree_accept'] + accept = self.trace[self.burn :]["mean_tree_accept"] npt.assert_allclose(accept.mean(), self.step.target_accept, 1) @@ -166,7 +164,7 @@ class MetropolisFixture(BaseSampler): @classmethod def make_step(cls): args = {} - if hasattr(cls, 'step_args'): + if hasattr(cls, "step_args"): args.update(cls.step_args) return pm.Metropolis(**args) @@ -175,6 +173,6 @@ class SliceFixture(BaseSampler): @classmethod def make_step(cls): args = {} - if hasattr(cls, 'step_args'): + if hasattr(cls, "step_args"): args.update(cls.step_args) return pm.Slice(**args) diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py index 35c8854adc..6caf258734 100644 --- a/pymc3/tests/test_diagnostics.py +++ b/pymc3/tests/test_diagnostics.py @@ -13,7 +13,9 @@ import theano -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestGelmanRubin(SeededTest): good_ratio = 1.1 @@ -23,24 +25,29 @@ def get_ptrace(self, n_samples): # Run sampler step1 = Slice([model.early_mean_log__, model.late_mean_log__]) step2 = Metropolis([model.switchpoint]) - start = {'early_mean': 7., 'late_mean': 5., 'switchpoint': 10} - ptrace = sample(n_samples, tune=0, step=[step1, step2], start=start, cores=2, - progressbar=False, random_seed=[20090425, 19700903]) + start = {"early_mean": 7.0, "late_mean": 5.0, "switchpoint": 10} + ptrace = sample( + n_samples, + tune=0, + step=[step1, step2], + start=start, + cores=2, + progressbar=False, + random_seed=[20090425, 19700903], + ) return ptrace def test_good(self): """Confirm Gelman-Rubin statistic is close to 1 for a reasonable number of samples.""" n_samples = 1000 rhat = gelman_rubin(self.get_ptrace(n_samples)) - assert all(1 / self.good_ratio < r < - self.good_ratio for r in rhat.values()) + assert all(1 / self.good_ratio < r < self.good_ratio for r in rhat.values()) def test_bad(self): """Confirm Gelman-Rubin statistic is far from 1 for a small number of samples.""" n_samples = 5 rhat = gelman_rubin(self.get_ptrace(n_samples)) - assert not all(1 / self.good_ratio < r < - self.good_ratio for r in rhat.values()) + assert not all(1 / self.good_ratio < r < self.good_ratio for r in rhat.values()) def test_right_shape_python_float(self, shape=None, test_shape=None): """Check Gelman-Rubin statistic shape is correct w/ python float""" @@ -49,17 +56,18 @@ def test_right_shape_python_float(self, shape=None, test_shape=None): with Model(): if shape is not None: - Normal('x', 0, 1., shape=shape) + Normal("x", 0, 1.0, shape=shape) else: - Normal('x', 0, 1.) + Normal("x", 0, 1.0) # start sampling at the MAP start = find_MAP() step = NUTS(scaling=start, step_scale=0.1) - ptrace = sample(n_samples, tune=0, step=step, start=start, - chains=chains, random_seed=42) + ptrace = sample( + n_samples, tune=0, step=step, start=start, chains=chains, random_seed=42 + ) - rhat = gelman_rubin(ptrace)['x'] + rhat = gelman_rubin(ptrace)["x"] if test_shape is None: test_shape = shape @@ -87,19 +95,26 @@ def test_right_shape_scalar_one(self): self.test_right_shape_python_float(shape=1, test_shape=(1,)) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestDiagnostics(SeededTest): - def get_switchpoint(self, n_samples, chains=1): model = build_disaster_model() with model: # Run sampler step1 = Slice([model.early_mean_log__, model.late_mean_log__]) step2 = Metropolis([model.switchpoint]) - trace = sample(0, tune=n_samples, step=[step1, step2], - progressbar=False, random_seed=1, - discard_tuned_samples=False, chains=chains) - return trace['switchpoint'] + trace = sample( + 0, + tune=n_samples, + step=[step1, step2], + progressbar=False, + random_seed=1, + discard_tuned_samples=False, + chains=chains, + ) + return trace["switchpoint"] def test_geweke_negative(self): """Confirm Geweke diagnostic is larger than 1 for a small number of samples.""" @@ -110,8 +125,7 @@ def test_geweke_negative(self): last = 0.7 # returns (intervalsx2) matrix, with first row start indexes, second # z-scores - z_switch = geweke(switchpoint, first=first, - last=last, intervals=n_intervals) + z_switch = geweke(switchpoint, first=first, last=last, intervals=n_intervals) # These z-scores should be larger, since there are not many samples. assert max(abs(z_switch[:, 1])) > 1 @@ -135,8 +149,7 @@ def test_geweke_positive(self): last = 0.7 # returns (intervalsx2) matrix, with first row start indexes, second # z-scores - z_switch = geweke(switchpoint, first=first, - last=last, intervals=n_intervals) + z_switch = geweke(switchpoint, first=first, last=last, intervals=n_intervals) start = z_switch[:, 0] z_scores = z_switch[:, 1] @@ -155,38 +168,49 @@ def test_effective_n(self): n_samples = 100 with Model(): - Normal('x', 0, 1., shape=5) + Normal("x", 0, 1.0, shape=5) # start sampling at the MAP start = find_MAP() step = NUTS(scaling=start) - ptrace = sample(0, tune=n_samples, step=step, start=start, - cores=n_jobs, discard_tuned_samples=False, - random_seed=42) - - n_effective = effective_n(ptrace)['x'] + ptrace = sample( + 0, + tune=n_samples, + step=step, + start=start, + cores=n_jobs, + discard_tuned_samples=False, + random_seed=42, + ) + + n_effective = effective_n(ptrace)["x"] assert_allclose(n_effective, n_jobs * n_samples, 2) - def test_effective_n_right_shape_python_float(self, - shape=None, test_shape=None): + def test_effective_n_right_shape_python_float(self, shape=None, test_shape=None): """Check effective sample size shape is correct w/ python float""" n_jobs = 3 n_samples = 10 with Model(): if shape is not None: - Normal('x', 0, 1., shape=shape) + Normal("x", 0, 1.0, shape=shape) else: - Normal('x', 0, 1.) + Normal("x", 0, 1.0) # start sampling at the MAP start = find_MAP() step = NUTS(scaling=start) - ptrace = sample(0, tune=n_samples, step=step, start=start, - cores=n_jobs, discard_tuned_samples=False, - random_seed=42) - - n_effective = effective_n(ptrace)['x'] + ptrace = sample( + 0, + tune=n_samples, + step=step, + start=start, + cores=n_jobs, + discard_tuned_samples=False, + random_seed=42, + ) + + n_effective = effective_n(ptrace)["x"] if test_shape is None: test_shape = shape @@ -211,5 +235,4 @@ def test_effective_n_right_shape_scalar_array(self): def test_effective_n_right_shape_scalar_one(self): """Check effective sample size shape is correct w/ scalar as shape=1""" - self.test_effective_n_right_shape_python_float(shape=1, - test_shape=(1,)) + self.test_effective_n_right_shape_python_float(shape=1, test_shape=(1,)) diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index 57e6fdd19a..df07e69e74 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -10,7 +10,12 @@ from ..theanof import floatX from ..distributions import Discrete from ..distributions.dist_math import ( - bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper) + bound, + factln, + alltrue_scalar, + MvNormalLogp, + SplineWrapper, +) def test_bound(): @@ -36,36 +41,29 @@ def test_bound(): assert not np.all(bound(logp, cond).eval() == 1) assert np.prod(bound(logp, cond).eval()) == -np.inf + def test_alltrue_scalar(): assert alltrue_scalar([]).eval() assert alltrue_scalar([True]).eval() assert alltrue_scalar([tt.ones(10)]).eval() - assert alltrue_scalar([tt.ones(10), - 5 * tt.ones(101)]).eval() - assert alltrue_scalar([np.ones(10), - 5 * tt.ones(101)]).eval() - assert alltrue_scalar([np.ones(10), - True, - 5 * tt.ones(101)]).eval() - assert alltrue_scalar([np.array([1, 2, 3]), - True, - 5 * tt.ones(101)]).eval() + assert alltrue_scalar([tt.ones(10), 5 * tt.ones(101)]).eval() + assert alltrue_scalar([np.ones(10), 5 * tt.ones(101)]).eval() + assert alltrue_scalar([np.ones(10), True, 5 * tt.ones(101)]).eval() + assert alltrue_scalar([np.array([1, 2, 3]), True, 5 * tt.ones(101)]).eval() assert not alltrue_scalar([False]).eval() assert not alltrue_scalar([tt.zeros(10)]).eval() - assert not alltrue_scalar([True, - False]).eval() - assert not alltrue_scalar([np.array([0, -1]), - tt.ones(60)]).eval() - assert not alltrue_scalar([np.ones(10), - False, - 5 * tt.ones(101)]).eval() + assert not alltrue_scalar([True, False]).eval() + assert not alltrue_scalar([np.array([0, -1]), tt.ones(60)]).eval() + assert not alltrue_scalar([np.ones(10), False, 5 * tt.ones(101)]).eval() + def test_alltrue_shape(): vals = [True, tt.ones(10), tt.zeros(5)] assert alltrue_scalar(vals).eval().shape == () + class MultinomialA(Discrete): def __init__(self, n, p, *args, **kwargs): super(MultinomialA, self).__init__(*args, **kwargs) @@ -77,11 +75,13 @@ def logp(self, value): n = self.n p = self.p - return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), - value >= 0, - 0 <= p, p <= 1, - tt.isclose(p.sum(), 1), - broadcast_conditions=False + return bound( + factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), + value >= 0, + 0 <= p, + p <= 1, + tt.isclose(p.sum(), 1), + broadcast_conditions=False, ) @@ -96,11 +96,13 @@ def logp(self, value): n = self.n p = self.p - return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), - tt.all(value >= 0), - tt.all(0 <= p), tt.all(p <= 1), - tt.isclose(p.sum(), 1), - broadcast_conditions=False + return bound( + factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), + tt.all(value >= 0), + tt.all(0 <= p), + tt.all(p <= 1), + tt.isclose(p.sum(), 1), + broadcast_conditions=False, ) @@ -110,27 +112,28 @@ def test_multinomial_bound(): n = x.sum() with pm.Model() as modelA: - p_a = pm.Dirichlet('p', floatX(np.ones(2))) - MultinomialA('x', n, p_a, observed=x) + p_a = pm.Dirichlet("p", floatX(np.ones(2))) + MultinomialA("x", n, p_a, observed=x) with pm.Model() as modelB: - p_b = pm.Dirichlet('p', floatX(np.ones(2))) - MultinomialB('x', n, p_b, observed=x) + p_b = pm.Dirichlet("p", floatX(np.ones(2))) + MultinomialB("x", n, p_b, observed=x) - assert np.isclose(modelA.logp({'p_stickbreaking__': [0]}), - modelB.logp({'p_stickbreaking__': [0]})) + assert np.isclose( + modelA.logp({"p_stickbreaking__": [0]}), modelB.logp({"p_stickbreaking__": [0]}) + ) -class TestMvNormalLogp(): +class TestMvNormalLogp: def test_logp(self): np.random.seed(42) chol_val = floatX(np.array([[1, 0.9], [0, 2]])) cov_val = floatX(np.dot(chol_val, chol_val.T)) - cov = tt.matrix('cov') + cov = tt.matrix("cov") cov.tag.test_value = cov_val delta_val = floatX(np.random.randn(5, 2)) - delta = tt.matrix('delta') + delta = tt.matrix("delta") delta.tag.test_value = delta_val expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) expect = expect.logpdf(delta_val).sum() @@ -144,14 +147,16 @@ def test_grad(self): np.random.seed(42) def func(chol_vec, delta): - chol = tt.stack([ - tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), - tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), - ]) + chol = tt.stack( + [ + tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), + tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), + ] + ) cov = tt.dot(chol, chol.T) return MvNormalLogp()(cov, delta) - chol_vec_val = floatX(np.array([0.5, 1., -0.1])) + chol_vec_val = floatX(np.array([0.5, 1.0, -0.1])) delta_val = floatX(np.random.randn(1, 2)) utt.verify_grad(func, [chol_vec_val, delta_val]) @@ -162,14 +167,16 @@ def func(chol_vec, delta): @pytest.mark.skip(reason="Fix in theano not released yet: Theano#5908") @theano.configparser.change_flags(compute_test_value="ignore") def test_hessian(self): - chol_vec = tt.vector('chol_vec') + chol_vec = tt.vector("chol_vec") chol_vec.tag.test_value = np.array([0.1, 2, 3]) - chol = tt.stack([ - tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), - tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), - ]) + chol = tt.stack( + [ + tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), + tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), + ] + ) cov = tt.dot(chol, chol.T) - delta = tt.matrix('delta') + delta = tt.matrix("delta") delta.tag.test_value = np.ones((5, 2)) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = tt.grad(logp, [cov, delta]) @@ -189,7 +196,7 @@ def test_hessian(self): x = np.linspace(0, 1, 100) y = x * x spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1)) - x_var = tt.dscalar('x') + x_var = tt.dscalar("x") g_x, = tt.grad(spline(x_var), [x_var]) with pytest.raises(NotImplementedError): tt.grad(g_x, [x_var]) diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py index 331872e04c..7b536d66e1 100644 --- a/pymc3/tests/test_distribution_defaults.py +++ b/pymc3/tests/test_distribution_defaults.py @@ -8,7 +8,6 @@ class DistTest(Continuous): - def __init__(self, a, b, *args, **kwargs): super(DistTest, self).__init__(*args, **kwargs) self.a = a @@ -20,52 +19,53 @@ def logp(self, v): def test_default_nan_fail(): with Model(), pytest.raises(AttributeError): - DistTest('x', np.nan, 2, defaults=['a']) + DistTest("x", np.nan, 2, defaults=["a"]) def test_default_empty_fail(): with Model(), pytest.raises(AttributeError): - DistTest('x', 1, 2, defaults=[]) + DistTest("x", 1, 2, defaults=[]) def test_default_testval(): with Model(): - x = DistTest('x', 1, 2, testval=5, defaults=[]) + x = DistTest("x", 1, 2, testval=5, defaults=[]) assert x.tag.test_value == 5 def test_default_testval_nan(): with Model(): - x = DistTest('x', 1, 2, testval=np.nan, defaults=['a']) + x = DistTest("x", 1, 2, testval=np.nan, defaults=["a"]) np.testing.assert_almost_equal(x.tag.test_value, np.nan) def test_default_a(): with Model(): - x = DistTest('x', 1, 2, defaults=['a']) + x = DistTest("x", 1, 2, defaults=["a"]) assert x.tag.test_value == 1 def test_default_b(): with Model(): - x = DistTest('x', np.nan, 2, defaults=['a', 'b']) + x = DistTest("x", np.nan, 2, defaults=["a", "b"]) assert x.tag.test_value == 2 def test_default_c(): with Model(): - y = DistTest('y', 7, 8, testval=94) - x = DistTest('x', y, 2, defaults=['a', 'b']) + y = DistTest("y", 7, 8, testval=94) + x = DistTest("x", y, 2, defaults=["a", "b"]) assert x.tag.test_value == 94 def test_default_discrete_uniform(): with Model(): - x = DiscreteUniform('x', lower=1, upper=2) + x = DiscreteUniform("x", lower=1, upper=2) assert x.init_value == 1 + def test_discrete_uniform_negative(): model = Model() with model: - x = DiscreteUniform('x', lower=-10, upper=0) - assert model.test_point['x'] == -5 + x = DiscreteUniform("x", lower=-10, upper=0) + assert model.test_point["x"] == -5 diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 783cd8e8e0..5891b34be5 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -8,17 +8,60 @@ from ..model import Model, Point, Potential, Deterministic from ..blocking import DictToVarBijection, DictToArrayBijection, ArrayOrdering from ..distributions import ( - DensityDist, Categorical, Multinomial, VonMises, Dirichlet, - MvStudentT, MvNormal, MatrixNormal, ZeroInflatedPoisson, - ZeroInflatedNegativeBinomial, Constant, Poisson, Bernoulli, Beta, - BetaBinomial, HalfStudentT, StudentT, Weibull, Pareto, - InverseGamma, Gamma, Cauchy, HalfCauchy, Lognormal, Laplace, - NegativeBinomial, Geometric, Exponential, ExGaussian, Normal, TruncatedNormal, - Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform, - Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, - Gumbel, Logistic, OrderedLogistic, LogitNormal, Interpolated, - ZeroInflatedBinomial, HalfFlat, AR1, KroneckerNormal, Rice, - Kumaraswamy + DensityDist, + Categorical, + Multinomial, + VonMises, + Dirichlet, + MvStudentT, + MvNormal, + MatrixNormal, + ZeroInflatedPoisson, + ZeroInflatedNegativeBinomial, + Constant, + Poisson, + Bernoulli, + Beta, + BetaBinomial, + HalfStudentT, + StudentT, + Weibull, + Pareto, + InverseGamma, + Gamma, + Cauchy, + HalfCauchy, + Lognormal, + Laplace, + NegativeBinomial, + Geometric, + Exponential, + ExGaussian, + Normal, + TruncatedNormal, + Flat, + LKJCorr, + Wald, + ChiSquared, + HalfNormal, + DiscreteUniform, + Bound, + Uniform, + Triangular, + Binomial, + SkewNormal, + DiscreteWeibull, + Gumbel, + Logistic, + OrderedLogistic, + LogitNormal, + Interpolated, + ZeroInflatedBinomial, + HalfFlat, + AR1, + KroneckerNormal, + Rice, + Kumaraswamy, ) from ..distributions import continuous @@ -37,6 +80,7 @@ import theano.tensor as tt from ..math import kronecker + def get_lkj_cases(): """ Log probabilities calculated using the formulas in: @@ -48,7 +92,7 @@ def get_lkj_cases(): (tri, 3, 3, -7.7963493376312742), (tri, 0, 3, -np.inf), (np.array([1.1, 0.0, -0.7]), 1, 3, -np.inf), - (np.array([0.7, 0.0, -1.1]), 1, 3, -np.inf) + (np.array([0.7, 0.0, -1.1]), 1, 3, -np.inf), ] @@ -58,7 +102,7 @@ def get_lkj_cases(): class Domain(object): def __init__(self, vals, dtype=None, edges=None, shape=None): avals = array(vals, dtype=dtype) - if dtype is None and not str(avals.dtype).startswith('int'): + if dtype is None and not str(avals.dtype).startswith("int"): avals = avals.astype(theano.config.floatX) vals = [array(v, dtype=avals.dtype) for v in vals] @@ -79,7 +123,8 @@ def __add__(self, other): [v + other for v in self.vals], self.dtype, (self.lower + other, self.upper + other), - self.shape) + self.shape, + ) def __mul__(self, other): try: @@ -87,20 +132,20 @@ def __mul__(self, other): [v * other for v in self.vals], self.dtype, (self.lower * other, self.upper * other), - self.shape) + self.shape, + ) except TypeError: return Domain( [v * other for v in self.vals], self.dtype, (self.lower, self.upper), - self.shape) + self.shape, + ) def __neg__(self): return Domain( - [-v for v in self.vals], - self.dtype, - (-self.lower, -self.upper), - self.shape) + [-v for v in self.vals], self.dtype, (-self.lower, -self.upper), self.shape + ) def product(domains, n_samples=-1): @@ -118,33 +163,35 @@ def product(domains, n_samples=-1): names, domains = zip(*domains.items()) except ValueError: # domains.items() is empty return [] - all_vals = [zip(names, val) for val in itertools.product(*[d.vals for d in domains])] + all_vals = [ + zip(names, val) for val in itertools.product(*[d.vals for d in domains]) + ] if n_samples > 0 and len(all_vals) > n_samples: - return (all_vals[j] for j in nr.choice(len(all_vals), n_samples, replace=False)) + return (all_vals[j] for j in nr.choice(len(all_vals), n_samples, replace=False)) return all_vals -R = Domain([-inf, -2.1, -1, -.01, .0, .01, 1, 2.1, inf]) -Rplus = Domain([0, .01, .1, .9, .99, 1, 1.5, 2, 100, inf]) -Rplusbig = Domain([0, .5, .9, .99, 1, 1.5, 2, 20, inf]) -Rminusbig = Domain([-inf, -2, -1.5, -1, -.99, -.9, -.5, -0.01, 0]) -Unit = Domain([0, .001, .1, .5, .75, .99, 1]) +R = Domain([-inf, -2.1, -1, -0.01, 0.0, 0.01, 1, 2.1, inf]) +Rplus = Domain([0, 0.01, 0.1, 0.9, 0.99, 1, 1.5, 2, 100, inf]) +Rplusbig = Domain([0, 0.5, 0.9, 0.99, 1, 1.5, 2, 20, inf]) +Rminusbig = Domain([-inf, -2, -1.5, -1, -0.99, -0.9, -0.5, -0.01, 0]) +Unit = Domain([0, 0.001, 0.1, 0.5, 0.75, 0.99, 1]) -Circ = Domain([-np.pi, -2.1, -1, -.01, .0, .01, 1, 2.1, np.pi]) +Circ = Domain([-np.pi, -2.1, -1, -0.01, 0.0, 0.01, 1, 2.1, np.pi]) -Runif = Domain([-1, -.4, 0, .4, 1]) -Rdunif = Domain([-10, 0, 10.]) -Rplusunif = Domain([0, .5, inf]) -Rplusdunif = Domain([2, 10, 100], 'int64') +Runif = Domain([-1, -0.4, 0, 0.4, 1]) +Rdunif = Domain([-10, 0, 10.0]) +Rplusunif = Domain([0, 0.5, inf]) +Rplusdunif = Domain([2, 10, 100], "int64") -I = Domain([-1000, -3, -2, -1, 0, 1, 2, 3, 1000], 'int64') +I = Domain([-1000, -3, -2, -1, 0, 1, 2, 3, 1000], "int64") -NatSmall = Domain([0, 3, 4, 5, 1000], 'int64') -Nat = Domain([0, 1, 2, 3, 2000], 'int64') -NatBig = Domain([0, 1, 2, 3, 5000, 50000], 'int64') -PosNat = Domain([1, 2, 3, 2000], 'int64') +NatSmall = Domain([0, 3, 4, 5, 1000], "int64") +Nat = Domain([0, 1, 2, 3, 2000], "int64") +NatBig = Domain([0, 1, 2, 3, 5000, 50000], "int64") +PosNat = Domain([1, 2, 3, 2000], "int64") -Bool = Domain([0, 0, 1, 1], 'int64') +Bool = Domain([0, 0, 1, 1], "int64") def build_model(distfam, valuedomain, vardomains, extra_args=None): @@ -153,10 +200,9 @@ def build_model(distfam, valuedomain, vardomains, extra_args=None): with Model() as m: vals = {} for v, dom in vardomains.items(): - vals[v] = Flat(v, dtype=dom.dtype, shape=dom.shape, - testval=dom.vals[0]) + vals[v] = Flat(v, dtype=dom.dtype, shape=dom.shape, testval=dom.vals[0]) vals.update(extra_args) - distfam('value', shape=valuedomain.shape, transform=None, **vals) + distfam("value", shape=valuedomain.shape, transform=None, **vals) return m @@ -167,21 +213,31 @@ def integrate_nd(f, domain, shape, dtype): else: return sum(f(j) for j in range(domain.lower, domain.upper + 1)) elif shape == (2,): + def f2(a, b): return f([a, b]) - return integrate.dblquad(f2, domain.lower[0], domain.upper[0], - lambda _: domain.lower[1], - lambda _: domain.upper[1])[0] + return integrate.dblquad( + f2, + domain.lower[0], + domain.upper[0], + lambda _: domain.lower[1], + lambda _: domain.upper[1], + )[0] elif shape == (3,): + def f3(a, b, c): return f([a, b, c]) - return integrate.tplquad(f3, domain.lower[0], domain.upper[0], - lambda _: domain.lower[1], - lambda _: domain.upper[1], - lambda _, __: domain.lower[2], - lambda _, __: domain.upper[2])[0] + return integrate.tplquad( + f3, + domain.lower[0], + domain.upper[0], + lambda _: domain.lower[1], + lambda _: domain.upper[1], + lambda _, __: domain.lower[2], + lambda _, __: domain.upper[2], + )[0] else: raise ValueError("Dont know how to integrate shape: " + str(shape)) @@ -197,7 +253,7 @@ def multinomial_logpdf(value, n, p): def beta_mu_sd(value, mu, sd): - kappa = mu * (1 - mu) / sd**2 - 1 + kappa = mu * (1 - mu) / sd ** 2 - 1 if kappa > 0: return sp.beta.logpdf(value, mu * kappa, (1 - mu) * kappa) else: @@ -271,14 +327,15 @@ def matrix_normal_logpdf_cov(value, mu, rowcov, colcov): def matrix_normal_logpdf_chol(value, mu, rowchol, colchol): - return matrix_normal_logpdf_cov(value, mu, np.dot(rowchol, rowchol.T), - np.dot(colchol, colchol.T)) + return matrix_normal_logpdf_cov( + value, mu, np.dot(rowchol, rowchol.T), np.dot(colchol, colchol.T) + ) def kron_normal_logpdf_cov(value, mu, covs, sigma): cov = kronecker(*covs).eval() if sigma is not None: - cov += sigma**2 * np.eye(*cov.shape) + cov += sigma ** 2 * np.eye(*cov.shape) return scipy.stats.multivariate_normal.logpdf(value, mu, cov).sum() @@ -311,8 +368,11 @@ def logpow(v, p): def discrete_weibull_logpmf(value, q, beta): - return floatX(np.log(np.power(q, np.power(value, beta)) - - np.power(q, np.power(value + 1, beta)))) + return floatX( + np.log( + np.power(q, np.power(value, beta)) - np.power(q, np.power(value + 1, beta)) + ) + ) def dirichlet_logpdf(value, a): @@ -325,6 +385,7 @@ def categorical_logpdf(value, p): else: return -inf + def mvt_logpdf(value, nu, Sigma, mu=0): d = len(Sigma) dist = np.atleast_2d(value) - mu @@ -333,22 +394,28 @@ def mvt_logpdf(value, nu, Sigma, mu=0): logdet = np.log(np.diag(chol)).sum() lgamma = scipy.special.gammaln - norm = lgamma((nu + d) / 2.) - 0.5 * d * np.log(nu * np.pi) - lgamma(nu / 2.) - logp = norm - logdet - (nu + d) / 2. * np.log1p((trafo * trafo).sum(-1) / nu) + norm = lgamma((nu + d) / 2.0) - 0.5 * d * np.log(nu * np.pi) - lgamma(nu / 2.0) + logp = norm - logdet - (nu + d) / 2.0 * np.log1p((trafo * trafo).sum(-1) / nu) return logp.sum() + def AR1_logpdf(value, k, tau_e): - return (sp.norm(loc=0,scale=1/np.sqrt(tau_e)).logpdf(value[0]) + - sp.norm(loc=k*value[:-1],scale=1/np.sqrt(tau_e)).logpdf(value[1:]).sum()) + return ( + sp.norm(loc=0, scale=1 / np.sqrt(tau_e)).logpdf(value[0]) + + sp.norm(loc=k * value[:-1], scale=1 / np.sqrt(tau_e)).logpdf(value[1:]).sum() + ) + def invlogit(x, eps=sys.float_info.epsilon): - return (1. - 2. * eps) / (1. + np.exp(-x)) + eps + return (1.0 - 2.0 * eps) / (1.0 + np.exp(-x)) + eps + def orderedlogistic_logpdf(value, eta, cutpoints): c = np.concatenate(([-np.inf], cutpoints, [np.inf])) p = invlogit(eta - c[value]) - invlogit(eta - c[value + 1]) return np.log(p) + class Simplex(object): def __init__(self, n): self.vals = list(simplex_values(n)) @@ -359,7 +426,9 @@ def __init__(self, n): class MultiSimplex(object): def __init__(self, n_dependent, n_independent): self.vals = [] - for simplex_value in itertools.product(simplex_values(n_dependent), repeat=n_independent): + for simplex_value in itertools.product( + simplex_values(n_dependent), repeat=n_independent + ): self.vals.append(np.vstack(simplex_value)) self.shape = (n_independent, n_dependent) self.dtype = Unit.dtype @@ -375,18 +444,21 @@ def PdMatrix(n): else: raise ValueError("n out of bounds") -PdMatrix1 = Domain([np.eye(1), [[.5]]], edges=(None, None)) -PdMatrix2 = Domain([np.eye(2), [[.5, .05], [.05, 4.5]]], edges=(None, None)) +PdMatrix1 = Domain([np.eye(1), [[0.5]]], edges=(None, None)) + +PdMatrix2 = Domain([np.eye(2), [[0.5, 0.05], [0.05, 4.5]]], edges=(None, None)) PdMatrix3 = Domain( - [np.eye(3), [[.5, .1, 0], [.1, 1, 0], [0, 0, 2.5]]], edges=(None, None)) + [np.eye(3), [[0.5, 0.1, 0], [0.1, 1, 0], [0, 0, 2.5]]], edges=(None, None) +) PdMatrixChol1 = Domain([np.eye(1), [[0.001]]], edges=(None, None)) PdMatrixChol2 = Domain([np.eye(2), [[0.1, 0], [10, 1]]], edges=(None, None)) -PdMatrixChol3 = Domain([np.eye(3), [[0.1, 0, 0], [10, 100, 0], [0, 1, 10]]], - edges=(None, None)) +PdMatrixChol3 = Domain( + [np.eye(3), [[0.1, 0, 0], [10, 100, 0], [0, 1, 10]]], edges=(None, None) +) def PdMatrixChol(n): @@ -402,8 +474,9 @@ def PdMatrixChol(n): PdMatrixCholUpper1 = Domain([np.eye(1), [[0.001]]], edges=(None, None)) PdMatrixCholUpper2 = Domain([np.eye(2), [[0.1, 10], [0, 1]]], edges=(None, None)) -PdMatrixCholUpper3 = Domain([np.eye(3), [[0.1, 10, 0], [0, 100, 1], [0, 0, 10]]], - edges=(None, None)) +PdMatrixCholUpper3 = Domain( + [np.eye(3), [[0.1, 10, 0], [0, 100, 1], [0, 0, 10]]], edges=(None, None) +) def PdMatrixCholUpper(n): @@ -423,42 +496,61 @@ def RandomPdMatrix(n): class TestMatchesScipy(SeededTest): - def pymc3_matches_scipy(self, pymc3_dist, domain, paramdomains, scipy_dist, - decimal=None, extra_args=None, scipy_args=None): + def pymc3_matches_scipy( + self, + pymc3_dist, + domain, + paramdomains, + scipy_dist, + decimal=None, + extra_args=None, + scipy_args=None, + ): if extra_args is None: extra_args = {} if scipy_args is None: scipy_args = {} model = build_model(pymc3_dist, domain, paramdomains, extra_args) - value = model.named_vars['value'] + value = model.named_vars["value"] def logp(args): args.update(scipy_args) return scipy_dist(**args) + self.check_logp(model, value, domain, paramdomains, logp, decimal=decimal) - def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal=None): + def check_logp( + self, model, value, domain, paramdomains, logp_reference, decimal=None + ): domains = paramdomains.copy() - domains['value'] = domain + domains["value"] = domain logp = model.fastlogp for pt in product(domains, n_samples=100): pt = Point(pt, model=model) if decimal is None: decimal = select_by_precision(float64=6, float32=3) - assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt)) + assert_almost_equal( + logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt) + ) - def check_logcdf(self, pymc3_dist, domain, paramdomains, scipy_logcdf, decimal=None): + def check_logcdf( + self, pymc3_dist, domain, paramdomains, scipy_logcdf, decimal=None + ): domains = paramdomains.copy() - domains['value'] = domain + domains["value"] = domain if decimal is None: decimal = select_by_precision(float64=6, float32=3) for pt in product(domains, n_samples=100): params = dict(pt) scipy_cdf = scipy_logcdf(**params) - value = params.pop('value') + value = params.pop("value") dist = pymc3_dist.dist(**params) - assert_almost_equal(dist.logcdf(value).tag.test_value, scipy_cdf, - decimal=decimal, err_msg=str(pt)) + assert_almost_equal( + dist.logcdf(value).tag.test_value, + scipy_cdf, + decimal=decimal, + err_msg=str(pt), + ) def check_int_to_1(self, model, value, domain, paramdomains): pdf = model.fastfn(exp(model.logpt)) @@ -478,9 +570,8 @@ def check_dlogp(self, model, value, domain, paramdomains): return domains = paramdomains.copy() - domains['value'] = domain - bij = DictToArrayBijection( - ArrayOrdering(model.cont_vars), model.test_point) + domains["value"] = domain + bij = DictToArrayBijection(ArrayOrdering(model.cont_vars), model.test_point) dlogp = bij.mapf(model.fastdlogp(model.cont_vars)) logp = bij.mapf(model.fastlogp) @@ -495,7 +586,9 @@ def wrapped_logp(x): pt = Point(pt, model=model) pt = bij.map(pt) decimals = select_by_precision(float64=6, float32=4) - assert_almost_equal(dlogp(pt), ndlogp(pt), decimal=decimals, err_msg=str(pt)) + assert_almost_equal( + dlogp(pt), ndlogp(pt), decimal=decimals, err_msg=str(pt) + ) def checkd(self, distfam, valuedomain, vardomains, checks=None, extra_args=None): if checks is None: @@ -505,328 +598,547 @@ def checkd(self, distfam, valuedomain, vardomains, checks=None, extra_args=None) extra_args = {} m = build_model(distfam, valuedomain, vardomains, extra_args=extra_args) for check in checks: - check(m, m.named_vars['value'], valuedomain, vardomains) + check(m, m.named_vars["value"], valuedomain, vardomains) def test_uniform(self): self.pymc3_matches_scipy( - Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif}, - lambda value, lower, upper: sp.uniform.logpdf(value, lower, upper - lower)) - self.check_logcdf(Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif}, - lambda value, lower, upper: sp.uniform.logcdf(value, lower, upper - lower)) + Uniform, + Runif, + {"lower": -Rplusunif, "upper": Rplusunif}, + lambda value, lower, upper: sp.uniform.logpdf(value, lower, upper - lower), + ) + self.check_logcdf( + Uniform, + Runif, + {"lower": -Rplusunif, "upper": Rplusunif}, + lambda value, lower, upper: sp.uniform.logcdf(value, lower, upper - lower), + ) def test_triangular(self): self.pymc3_matches_scipy( - Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif}, - lambda value, c, lower, upper: sp.triang.logpdf(value, c-lower, lower, upper-lower)) - self.check_logcdf(Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif}, - lambda value, c, lower, upper: sp.triang.logcdf(value, c-lower, lower, upper-lower)) + Triangular, + Runif, + {"lower": -Rplusunif, "c": Runif, "upper": Rplusunif}, + lambda value, c, lower, upper: sp.triang.logpdf( + value, c - lower, lower, upper - lower + ), + ) + self.check_logcdf( + Triangular, + Runif, + {"lower": -Rplusunif, "c": Runif, "upper": Rplusunif}, + lambda value, c, lower, upper: sp.triang.logcdf( + value, c - lower, lower, upper - lower + ), + ) def test_bound_normal(self): - PositiveNormal = Bound(Normal, lower=0.) - self.pymc3_matches_scipy(PositiveNormal, Rplus, {'mu': Rplus, 'sd': Rplus}, - lambda value, mu, sd: sp.norm.logpdf(value, mu, sd), - decimal=select_by_precision(float64=6, float32=-1)) - with Model(): x = PositiveNormal('x', mu=0, sd=1, transform=None) - assert np.isinf(x.logp({'x':-1})) + PositiveNormal = Bound(Normal, lower=0.0) + self.pymc3_matches_scipy( + PositiveNormal, + Rplus, + {"mu": Rplus, "sd": Rplus}, + lambda value, mu, sd: sp.norm.logpdf(value, mu, sd), + decimal=select_by_precision(float64=6, float32=-1), + ) + with Model(): + x = PositiveNormal("x", mu=0, sd=1, transform=None) + assert np.isinf(x.logp({"x": -1})) def test_discrete_unif(self): self.pymc3_matches_scipy( - DiscreteUniform, Rdunif, {'lower': -Rplusdunif, 'upper': Rplusdunif}, - lambda value, lower, upper: sp.randint.logpmf(value, lower, upper + 1)) + DiscreteUniform, + Rdunif, + {"lower": -Rplusdunif, "upper": Rplusdunif}, + lambda value, lower, upper: sp.randint.logpmf(value, lower, upper + 1), + ) def test_flat(self): self.pymc3_matches_scipy(Flat, Runif, {}, lambda value: 0) with Model(): - x = Flat('a') + x = Flat("a") assert_allclose(x.tag.test_value, 0) self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5)) # Check infinite cases individually. - assert 0. == Flat.dist().logcdf(np.inf).tag.test_value + assert 0.0 == Flat.dist().logcdf(np.inf).tag.test_value assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value def test_half_flat(self): self.pymc3_matches_scipy(HalfFlat, Rplus, {}, lambda value: 0) with Model(): - x = HalfFlat('a', shape=2) + x = HalfFlat("a", shape=2) assert_allclose(x.tag.test_value, 1) assert x.tag.test_value.shape == (2,) self.check_logcdf(HalfFlat, Runif, {}, lambda value: -np.inf) # Check infinite cases individually. - assert 0. == HalfFlat.dist().logcdf(np.inf).tag.test_value + assert 0.0 == HalfFlat.dist().logcdf(np.inf).tag.test_value assert -np.inf == HalfFlat.dist().logcdf(-np.inf).tag.test_value def test_normal(self): - self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus}, - lambda value, mu, sd: sp.norm.logpdf(value, mu, sd), - decimal=select_by_precision(float64=6, float32=1) - ) - self.check_logcdf(Normal, R, {'mu': R, 'sd': Rplus}, - lambda value, mu, sd: sp.norm.logcdf(value, mu, sd)) + self.pymc3_matches_scipy( + Normal, + R, + {"mu": R, "sd": Rplus}, + lambda value, mu, sd: sp.norm.logpdf(value, mu, sd), + decimal=select_by_precision(float64=6, float32=1), + ) + self.check_logcdf( + Normal, + R, + {"mu": R, "sd": Rplus}, + lambda value, mu, sd: sp.norm.logcdf(value, mu, sd), + ) def test_truncated_normal(self): def scipy_logp(value, mu, sd, lower, upper): return sp.truncnorm.logpdf( - value, (lower-mu)/sd, (upper-mu)/sd, loc=mu, scale=sd) - - args = {'mu': array(-2.1), 'lower': array(-100.), 'upper': array(0.01), - 'sd': array(0.01)} - val = TruncatedNormal.dist(**args).logp(0.) + value, (lower - mu) / sd, (upper - mu) / sd, loc=mu, scale=sd + ) + + args = { + "mu": array(-2.1), + "lower": array(-100.0), + "upper": array(0.01), + "sd": array(0.01), + } + val = TruncatedNormal.dist(**args).logp(0.0) assert_allclose(val.eval(), scipy_logp(value=0, **args)) self.pymc3_matches_scipy( - TruncatedNormal, R, - {'mu': R, 'sd': Rplusbig, 'lower': -Rplusbig, 'upper': Rplusbig}, + TruncatedNormal, + R, + {"mu": R, "sd": Rplusbig, "lower": -Rplusbig, "upper": Rplusbig}, scipy_logp, - decimal=select_by_precision(float64=6, float32=1) + decimal=select_by_precision(float64=6, float32=1), ) def test_half_normal(self): - self.pymc3_matches_scipy(HalfNormal, Rplus, {'sd': Rplus}, - lambda value, sd: sp.halfnorm.logpdf(value, scale=sd), - decimal=select_by_precision(float64=6, float32=-1) - ) - self.check_logcdf(HalfNormal, Rplus, {'sd': Rplus}, - lambda value, sd: sp.halfnorm.logcdf(value, scale=sd)) + self.pymc3_matches_scipy( + HalfNormal, + Rplus, + {"sd": Rplus}, + lambda value, sd: sp.halfnorm.logpdf(value, scale=sd), + decimal=select_by_precision(float64=6, float32=-1), + ) + self.check_logcdf( + HalfNormal, + Rplus, + {"sd": Rplus}, + lambda value, sd: sp.halfnorm.logcdf(value, scale=sd), + ) def test_chi_squared(self): - self.pymc3_matches_scipy(ChiSquared, Rplus, {'nu': Rplusdunif}, - lambda value, nu: sp.chi2.logpdf(value, df=nu)) + self.pymc3_matches_scipy( + ChiSquared, + Rplus, + {"nu": Rplusdunif}, + lambda value, nu: sp.chi2.logpdf(value, df=nu), + ) @pytest.mark.xfail(reason="Poor CDF in SciPy. See scipy/scipy#869 for details.") def test_wald_scipy(self): - self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus}, - lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha), - decimal=select_by_precision(float64=6, float32=1) - ) - self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus}, - lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha)) - - @pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [ - (.5, .001, .5, None, 0., -124500.7257914), - (1., .5, .001, None, 0., -4.3733162), - (2., 1., None, None, 0., -2.2086593), - (5., 2., 2.5, None, 0., -3.4374500), - (7.5, 5., None, 1., 0., -3.2199074), - (15., 10., None, .75, 0., -4.0360623), - (50., 15., None, .66666, 0., -6.1801249), - (.5, .001, 0.5, None, 0., -124500.7257914), - (1., .5, .001, None, .5, -3.3330954), - (2., 1., None, None, 1., -0.9189385), - (5., 2., 2.5, None, 2., -2.2128783), - (7.5, 5., None, 1., 2.5, -2.5283764), - (15., 10., None, .75, 5., -3.3653647), - (50., 15., None, .666666, 10., -5.6481874) - ]) + self.pymc3_matches_scipy( + Wald, + Rplus, + {"mu": Rplus, "alpha": Rplus}, + lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha), + decimal=select_by_precision(float64=6, float32=1), + ) + self.check_logcdf( + Wald, + Rplus, + {"mu": Rplus, "alpha": Rplus}, + lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha), + ) + + @pytest.mark.parametrize( + "value,mu,lam,phi,alpha,logp", + [ + (0.5, 0.001, 0.5, None, 0.0, -124500.7257914), + (1.0, 0.5, 0.001, None, 0.0, -4.3733162), + (2.0, 1.0, None, None, 0.0, -2.2086593), + (5.0, 2.0, 2.5, None, 0.0, -3.4374500), + (7.5, 5.0, None, 1.0, 0.0, -3.2199074), + (15.0, 10.0, None, 0.75, 0.0, -4.0360623), + (50.0, 15.0, None, 0.66666, 0.0, -6.1801249), + (0.5, 0.001, 0.5, None, 0.0, -124500.7257914), + (1.0, 0.5, 0.001, None, 0.5, -3.3330954), + (2.0, 1.0, None, None, 1.0, -0.9189385), + (5.0, 2.0, 2.5, None, 2.0, -2.2128783), + (7.5, 5.0, None, 1.0, 2.5, -2.5283764), + (15.0, 10.0, None, 0.75, 5.0, -3.3653647), + (50.0, 15.0, None, 0.666666, 10.0, -5.6481874), + ], + ) def test_wald(self, value, mu, lam, phi, alpha, logp): # Log probabilities calculated using the dIG function from the R package gamlss. # See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or # http://www.gamlss.org/. with Model() as model: - Wald('wald', mu=mu, lam=lam, phi=phi, alpha=alpha, transform=None) - pt = {'wald': value} + Wald("wald", mu=mu, lam=lam, phi=phi, alpha=alpha, transform=None) + pt = {"wald": value} decimals = select_by_precision(float64=6, float32=1) assert_almost_equal(model.fastlogp(pt), logp, decimal=decimals, err_msg=str(pt)) def test_beta(self): - self.pymc3_matches_scipy(Beta, Unit, {'alpha': Rplus, 'beta': Rplus}, - lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta)) - self.pymc3_matches_scipy(Beta, Unit, {'mu': Unit, 'sd': Rplus}, beta_mu_sd) - self.check_logcdf(Beta, Unit, {'alpha': Rplus, 'beta': Rplus}, - lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta)) + self.pymc3_matches_scipy( + Beta, + Unit, + {"alpha": Rplus, "beta": Rplus}, + lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta), + ) + self.pymc3_matches_scipy(Beta, Unit, {"mu": Unit, "sd": Rplus}, beta_mu_sd) + self.check_logcdf( + Beta, + Unit, + {"alpha": Rplus, "beta": Rplus}, + lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta), + ) def test_kumaraswamy(self): # Scipy does not have a built-in Kumaraswamy pdf def scipy_log_pdf(value, a, b): - return np.log(a) + np.log(b) + (a - 1) * np.log(value) + (b - 1) * np.log(1 - value ** a) - self.pymc3_matches_scipy(Kumaraswamy, Unit, {'a': Rplus, 'b': Rplus}, scipy_log_pdf) + return ( + np.log(a) + + np.log(b) + + (a - 1) * np.log(value) + + (b - 1) * np.log(1 - value ** a) + ) + + self.pymc3_matches_scipy( + Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_pdf + ) def test_exponential(self): - self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus}, - lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam)) - self.check_logcdf(Exponential, Rplus, {'lam': Rplus}, - lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam)) + self.pymc3_matches_scipy( + Exponential, + Rplus, + {"lam": Rplus}, + lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam), + ) + self.check_logcdf( + Exponential, + Rplus, + {"lam": Rplus}, + lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam), + ) def test_geometric(self): - self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit}, - lambda value, p: np.log(sp.geom.pmf(value, p))) + self.pymc3_matches_scipy( + Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p)) + ) def test_negative_binomial(self): def test_fun(value, mu, alpha): return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha)) - self.pymc3_matches_scipy(NegativeBinomial, Nat, { - 'mu': Rplus, 'alpha': Rplus}, test_fun) + + self.pymc3_matches_scipy( + NegativeBinomial, Nat, {"mu": Rplus, "alpha": Rplus}, test_fun + ) def test_laplace(self): - self.pymc3_matches_scipy(Laplace, R, {'mu': R, 'b': Rplus}, - lambda value, mu, b: sp.laplace.logpdf(value, mu, b)) - self.check_logcdf(Laplace, R, {'mu': R, 'b': Rplus}, - lambda value, mu, b: sp.laplace.logcdf(value, mu, b)) + self.pymc3_matches_scipy( + Laplace, + R, + {"mu": R, "b": Rplus}, + lambda value, mu, b: sp.laplace.logpdf(value, mu, b), + ) + self.check_logcdf( + Laplace, + R, + {"mu": R, "b": Rplus}, + lambda value, mu, b: sp.laplace.logcdf(value, mu, b), + ) def test_lognormal(self): self.pymc3_matches_scipy( - Lognormal, Rplus, {'mu': R, 'tau': Rplusbig}, - lambda value, mu, tau: floatX(sp.lognorm.logpdf(value, tau**-.5, 0, np.exp(mu)))) - self.check_logcdf(Lognormal, Rplus, {'mu': R, 'tau': Rplusbig}, - lambda value, mu, tau: sp.lognorm.logcdf(value, tau**-.5, 0, np.exp(mu))) + Lognormal, + Rplus, + {"mu": R, "tau": Rplusbig}, + lambda value, mu, tau: floatX( + sp.lognorm.logpdf(value, tau ** -0.5, 0, np.exp(mu)) + ), + ) + self.check_logcdf( + Lognormal, + Rplus, + {"mu": R, "tau": Rplusbig}, + lambda value, mu, tau: sp.lognorm.logcdf(value, tau ** -0.5, 0, np.exp(mu)), + ) def test_t(self): - self.pymc3_matches_scipy(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus}, - lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam**-0.5)) - self.check_logcdf(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus}, - lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam**-0.5)) + self.pymc3_matches_scipy( + StudentT, + R, + {"nu": Rplus, "mu": R, "lam": Rplus}, + lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam ** -0.5), + ) + self.check_logcdf( + StudentT, + R, + {"nu": Rplus, "mu": R, "lam": Rplus}, + lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam ** -0.5), + ) def test_cauchy(self): - self.pymc3_matches_scipy(Cauchy, R, {'alpha': R, 'beta': Rplusbig}, - lambda value, alpha, beta: sp.cauchy.logpdf(value, alpha, beta)) - self.check_logcdf(Cauchy, R, {'alpha': R, 'beta': Rplusbig}, - lambda value, alpha, beta: sp.cauchy.logcdf(value, alpha, beta)) + self.pymc3_matches_scipy( + Cauchy, + R, + {"alpha": R, "beta": Rplusbig}, + lambda value, alpha, beta: sp.cauchy.logpdf(value, alpha, beta), + ) + self.check_logcdf( + Cauchy, + R, + {"alpha": R, "beta": Rplusbig}, + lambda value, alpha, beta: sp.cauchy.logcdf(value, alpha, beta), + ) def test_half_cauchy(self): - self.pymc3_matches_scipy(HalfCauchy, Rplus, {'beta': Rplusbig}, - lambda value, beta: sp.halfcauchy.logpdf(value, scale=beta)) - self.check_logcdf(HalfCauchy, Rplus, {'beta': Rplusbig}, - lambda value, beta: sp.halfcauchy.logcdf(value, scale=beta)) + self.pymc3_matches_scipy( + HalfCauchy, + Rplus, + {"beta": Rplusbig}, + lambda value, beta: sp.halfcauchy.logpdf(value, scale=beta), + ) + self.check_logcdf( + HalfCauchy, + Rplus, + {"beta": Rplusbig}, + lambda value, beta: sp.halfcauchy.logcdf(value, scale=beta), + ) def test_gamma(self): self.pymc3_matches_scipy( - Gamma, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig}, - lambda value, alpha, beta: sp.gamma.logpdf(value, alpha, scale=1.0 / beta)) + Gamma, + Rplus, + {"alpha": Rplusbig, "beta": Rplusbig}, + lambda value, alpha, beta: sp.gamma.logpdf(value, alpha, scale=1.0 / beta), + ) def test_fun(value, mu, sd): - return sp.gamma.logpdf(value, mu**2 / sd**2, scale=1.0 / (mu / sd**2)) + return sp.gamma.logpdf(value, mu ** 2 / sd ** 2, scale=1.0 / (mu / sd ** 2)) + self.pymc3_matches_scipy( - Gamma, Rplus, {'mu': Rplusbig, 'sd': Rplusbig}, test_fun) + Gamma, Rplus, {"mu": Rplusbig, "sd": Rplusbig}, test_fun + ) def test_inverse_gamma(self): self.pymc3_matches_scipy( - InverseGamma, Rplus, {'alpha': Rplus, 'beta': Rplus}, - lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta)) + InverseGamma, + Rplus, + {"alpha": Rplus, "beta": Rplus}, + lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta), + ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), - reason="Fails on float32 due to scaling issues") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to scaling issues", + ) def test_inverse_gamma_alt_params(self): def test_fun(value, mu, sd): alpha, beta = InverseGamma._get_alpha_beta(None, None, mu, sd) return sp.invgamma.logpdf(value, alpha, scale=beta) + self.pymc3_matches_scipy( - InverseGamma, Rplus, {'mu': Rplus, 'sd': Rplus}, test_fun) + InverseGamma, Rplus, {"mu": Rplus, "sd": Rplus}, test_fun + ) def test_pareto(self): - self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig}, - lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m)) - self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig}, - lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m)) + self.pymc3_matches_scipy( + Pareto, + Rplus, + {"alpha": Rplusbig, "m": Rplusbig}, + lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m), + ) + self.check_logcdf( + Pareto, + Rplus, + {"alpha": Rplusbig, "m": Rplusbig}, + lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m), + ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", + ) def test_weibull(self): - self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig}, - lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta), - ) - self.check_logcdf(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig}, - lambda value, alpha, beta: - sp.exponweib.logcdf(value, 1, alpha, scale=beta),) + self.pymc3_matches_scipy( + Weibull, + Rplus, + {"alpha": Rplusbig, "beta": Rplusbig}, + lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta), + ) + self.check_logcdf( + Weibull, + Rplus, + {"alpha": Rplusbig, "beta": Rplusbig}, + lambda value, alpha, beta: sp.exponweib.logcdf(value, 1, alpha, scale=beta), + ) def test_half_studentt(self): # this is only testing for nu=1 (halfcauchy) - self.pymc3_matches_scipy(HalfStudentT, Rplus, {'sd': Rplus}, - lambda value, sd: sp.halfcauchy.logpdf(value, 0, sd)) + self.pymc3_matches_scipy( + HalfStudentT, + Rplus, + {"sd": Rplus}, + lambda value, sd: sp.halfcauchy.logpdf(value, 0, sd), + ) def test_skew_normal(self): - self.pymc3_matches_scipy(SkewNormal, R, {'mu': R, 'sd': Rplusbig, 'alpha': R}, - lambda value, alpha, mu, sd: sp.skewnorm.logpdf(value, alpha, mu, sd)) + self.pymc3_matches_scipy( + SkewNormal, + R, + {"mu": R, "sd": Rplusbig, "alpha": R}, + lambda value, alpha, mu, sd: sp.skewnorm.logpdf(value, alpha, mu, sd), + ) def test_binomial(self): - self.pymc3_matches_scipy(Binomial, Nat, {'n': NatSmall, 'p': Unit}, - lambda value, n, p: sp.binom.logpmf(value, n, p)) + self.pymc3_matches_scipy( + Binomial, + Nat, + {"n": NatSmall, "p": Unit}, + lambda value, n, p: sp.binom.logpmf(value, n, p), + ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_beta_binomial(self): - self.checkd(BetaBinomial, Nat, {'alpha': Rplus, 'beta': Rplus, 'n': NatSmall}) + self.checkd(BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall}) def test_bernoulli(self): self.pymc3_matches_scipy( - Bernoulli, Bool, {'logit_p': R}, - lambda value, logit_p: sp.bernoulli.logpmf(value, scipy.special.expit(logit_p))) - self.pymc3_matches_scipy(Bernoulli, Bool, {'p': Unit}, - lambda value, p: sp.bernoulli.logpmf(value, p)) - + Bernoulli, + Bool, + {"logit_p": R}, + lambda value, logit_p: sp.bernoulli.logpmf( + value, scipy.special.expit(logit_p) + ), + ) + self.pymc3_matches_scipy( + Bernoulli, Bool, {"p": Unit}, lambda value, p: sp.bernoulli.logpmf(value, p) + ) def test_discrete_weibull(self): - self.pymc3_matches_scipy(DiscreteWeibull, Nat, - {'q': Unit, 'beta': Rplusdunif}, discrete_weibull_logpmf) + self.pymc3_matches_scipy( + DiscreteWeibull, + Nat, + {"q": Unit, "beta": Rplusdunif}, + discrete_weibull_logpmf, + ) def test_poisson(self): - self.pymc3_matches_scipy(Poisson, Nat, {'mu': Rplus}, - lambda value, mu: sp.poisson.logpmf(value, mu)) + self.pymc3_matches_scipy( + Poisson, Nat, {"mu": Rplus}, lambda value, mu: sp.poisson.logpmf(value, mu) + ) def test_bound_poisson(self): - NonZeroPoisson = Bound(Poisson, lower=1.) - self.pymc3_matches_scipy(NonZeroPoisson, PosNat, {'mu': Rplus}, - lambda value, mu: sp.poisson.logpmf(value, mu)) + NonZeroPoisson = Bound(Poisson, lower=1.0) + self.pymc3_matches_scipy( + NonZeroPoisson, + PosNat, + {"mu": Rplus}, + lambda value, mu: sp.poisson.logpmf(value, mu), + ) - with Model(): x = NonZeroPoisson('x', mu=4) - assert np.isinf(x.logp({'x':0})) + with Model(): + x = NonZeroPoisson("x", mu=4) + assert np.isinf(x.logp({"x": 0})) def test_constantdist(self): - self.pymc3_matches_scipy(Constant, I, {'c': I}, - lambda value, c: np.log(c == value)) + self.pymc3_matches_scipy( + Constant, I, {"c": I}, lambda value, c: np.log(c == value) + ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_zeroinflatedpoisson(self): - self.checkd(ZeroInflatedPoisson, Nat, {'theta': Rplus, 'psi': Unit}) + self.checkd(ZeroInflatedPoisson, Nat, {"theta": Rplus, "psi": Unit}) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_zeroinflatednegativebinomial(self): - self.checkd(ZeroInflatedNegativeBinomial, Nat, - {'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit}) + self.checkd( + ZeroInflatedNegativeBinomial, + Nat, + {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit}, + ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_zeroinflatedbinomial(self): - self.checkd(ZeroInflatedBinomial, Nat, - {'n': NatSmall, 'p': Unit, 'psi': Unit}) + self.checkd(ZeroInflatedBinomial, Nat, {"n": NatSmall, "p": Unit, "psi": Unit}) - @pytest.mark.parametrize('n', [1, 2, 3]) + @pytest.mark.parametrize("n", [1, 2, 3]) def test_mvnormal(self, n): - self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n), - {'mu': Vector(R, n), 'tau': PdMatrix(n)}, - normal_logpdf_tau) - self.pymc3_matches_scipy(MvNormal, Vector(R, n), - {'mu': Vector(R, n), 'tau': PdMatrix(n)}, - normal_logpdf_tau) - self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n), - {'mu': Vector(R, n), 'cov': PdMatrix(n)}, - normal_logpdf_cov) - self.pymc3_matches_scipy(MvNormal, Vector(R, n), - {'mu': Vector(R, n), 'cov': PdMatrix(n)}, - normal_logpdf_cov) - self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n), - {'mu': Vector(R, n), 'chol': PdMatrixChol(n)}, - normal_logpdf_chol, - decimal=select_by_precision(float64=6, float32=-1)) - self.pymc3_matches_scipy(MvNormal, Vector(R, n), - {'mu': Vector(R, n), 'chol': PdMatrixChol(n)}, - normal_logpdf_chol, - decimal=select_by_precision(float64=6, float32=0)) + self.pymc3_matches_scipy( + MvNormal, + RealMatrix(5, n), + {"mu": Vector(R, n), "tau": PdMatrix(n)}, + normal_logpdf_tau, + ) + self.pymc3_matches_scipy( + MvNormal, + Vector(R, n), + {"mu": Vector(R, n), "tau": PdMatrix(n)}, + normal_logpdf_tau, + ) + self.pymc3_matches_scipy( + MvNormal, + RealMatrix(5, n), + {"mu": Vector(R, n), "cov": PdMatrix(n)}, + normal_logpdf_cov, + ) + self.pymc3_matches_scipy( + MvNormal, + Vector(R, n), + {"mu": Vector(R, n), "cov": PdMatrix(n)}, + normal_logpdf_cov, + ) + self.pymc3_matches_scipy( + MvNormal, + RealMatrix(5, n), + {"mu": Vector(R, n), "chol": PdMatrixChol(n)}, + normal_logpdf_chol, + decimal=select_by_precision(float64=6, float32=-1), + ) + self.pymc3_matches_scipy( + MvNormal, + Vector(R, n), + {"mu": Vector(R, n), "chol": PdMatrixChol(n)}, + normal_logpdf_chol, + decimal=select_by_precision(float64=6, float32=0), + ) def MvNormalUpper(*args, **kwargs): return MvNormal(lower=False, *args, **kwargs) - self.pymc3_matches_scipy(MvNormalUpper, Vector(R, n), - {'mu': Vector(R, n), 'chol': PdMatrixCholUpper(n)}, - normal_logpdf_chol_upper, - decimal=select_by_precision(float64=6, float32=0)) + self.pymc3_matches_scipy( + MvNormalUpper, + Vector(R, n), + {"mu": Vector(R, n), "chol": PdMatrixCholUpper(n)}, + normal_logpdf_chol_upper, + decimal=select_by_precision(float64=6, float32=0), + ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", + ) def test_mvnormal_indef(self): cov_val = np.array([[1, 0.5], [0.5, -2]]) - cov = tt.matrix('cov') + cov = tt.matrix("cov") cov.tag.test_value = np.eye(2) mu = floatX(np.zeros(2)) - x = tt.vector('x') + x = tt.vector("x") x.tag.test_value = np.zeros(2) logp = MvNormal.dist(mu=mu, cov=cov).logp(x) f_logp = theano.function([cov, x], logp) @@ -845,96 +1157,153 @@ def test_mvnormal_indef(self): def test_mvnormal_init_fail(self): with Model(): with pytest.raises(ValueError): - x = MvNormal('x', mu=np.zeros(3), shape=3) + x = MvNormal("x", mu=np.zeros(3), shape=3) with pytest.raises(ValueError): - x = MvNormal('x', mu=np.zeros(3), cov=np.eye(3), tau=np.eye(3), shape=3) + x = MvNormal("x", mu=np.zeros(3), cov=np.eye(3), tau=np.eye(3), shape=3) - @pytest.mark.parametrize('n', [1, 2, 3]) + @pytest.mark.parametrize("n", [1, 2, 3]) def test_matrixnormal(self, n): mat_scale = 1e3 # To reduce logp magnitude - mean_scale = .1 - self.pymc3_matches_scipy(MatrixNormal, RealMatrix(n, n), - {'mu': RealMatrix(n, n)*mean_scale, - 'rowcov': PdMatrix(n)*mat_scale, - 'colcov': PdMatrix(n)*mat_scale}, - matrix_normal_logpdf_cov) - self.pymc3_matches_scipy(MatrixNormal, RealMatrix(2, n), - {'mu': RealMatrix(2, n)*mean_scale, - 'rowcov': PdMatrix(2)*mat_scale, - 'colcov': PdMatrix(n)*mat_scale}, - matrix_normal_logpdf_cov) - self.pymc3_matches_scipy(MatrixNormal, RealMatrix(3, n), - {'mu': RealMatrix(3, n)*mean_scale, - 'rowchol': PdMatrixChol(3)*mat_scale, - 'colchol': PdMatrixChol(n)*mat_scale}, - matrix_normal_logpdf_chol, - decimal=select_by_precision(float64=6, float32=-1)) - self.pymc3_matches_scipy(MatrixNormal, RealMatrix(n, 3), - {'mu': RealMatrix(n, 3)*mean_scale, - 'rowchol': PdMatrixChol(n)*mat_scale, - 'colchol': PdMatrixChol(3)*mat_scale}, - matrix_normal_logpdf_chol, - decimal=select_by_precision(float64=6, float32=0)) - - @pytest.mark.parametrize('n', [2, 3]) - @pytest.mark.parametrize('m', [3]) - @pytest.mark.parametrize('sigma', [None, 1.0]) + mean_scale = 0.1 + self.pymc3_matches_scipy( + MatrixNormal, + RealMatrix(n, n), + { + "mu": RealMatrix(n, n) * mean_scale, + "rowcov": PdMatrix(n) * mat_scale, + "colcov": PdMatrix(n) * mat_scale, + }, + matrix_normal_logpdf_cov, + ) + self.pymc3_matches_scipy( + MatrixNormal, + RealMatrix(2, n), + { + "mu": RealMatrix(2, n) * mean_scale, + "rowcov": PdMatrix(2) * mat_scale, + "colcov": PdMatrix(n) * mat_scale, + }, + matrix_normal_logpdf_cov, + ) + self.pymc3_matches_scipy( + MatrixNormal, + RealMatrix(3, n), + { + "mu": RealMatrix(3, n) * mean_scale, + "rowchol": PdMatrixChol(3) * mat_scale, + "colchol": PdMatrixChol(n) * mat_scale, + }, + matrix_normal_logpdf_chol, + decimal=select_by_precision(float64=6, float32=-1), + ) + self.pymc3_matches_scipy( + MatrixNormal, + RealMatrix(n, 3), + { + "mu": RealMatrix(n, 3) * mean_scale, + "rowchol": PdMatrixChol(n) * mat_scale, + "colchol": PdMatrixChol(3) * mat_scale, + }, + matrix_normal_logpdf_chol, + decimal=select_by_precision(float64=6, float32=0), + ) + + @pytest.mark.parametrize("n", [2, 3]) + @pytest.mark.parametrize("m", [3]) + @pytest.mark.parametrize("sigma", [None, 1.0]) def test_kroneckernormal(self, n, m, sigma): np.random.seed(5) - N = n*m + N = n * m covs = [RandomPdMatrix(n), RandomPdMatrix(m)] chols = list(map(np.linalg.cholesky, covs)) evds = list(map(np.linalg.eigh, covs)) - dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) - mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) + dom = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N) + mu = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N) - std_args = {'mu': mu} - cov_args = {'covs': covs} - chol_args = {'chols': chols} - evd_args = {'evds': evds} + std_args = {"mu": mu} + cov_args = {"covs": covs} + chol_args = {"chols": chols} + evd_args = {"evds": evds} if sigma is not None and sigma != 0: - std_args['sigma'] = Domain([sigma], edges=(None, None)) + std_args["sigma"] = Domain([sigma], edges=(None, None)) else: for args in [cov_args, chol_args, evd_args]: - args['sigma'] = sigma + args["sigma"] = sigma self.pymc3_matches_scipy( - KroneckerNormal, dom, std_args, kron_normal_logpdf_cov, - extra_args=cov_args, scipy_args=cov_args) + KroneckerNormal, + dom, + std_args, + kron_normal_logpdf_cov, + extra_args=cov_args, + scipy_args=cov_args, + ) self.pymc3_matches_scipy( - KroneckerNormal, dom, std_args, kron_normal_logpdf_chol, - extra_args=chol_args, scipy_args=chol_args) + KroneckerNormal, + dom, + std_args, + kron_normal_logpdf_chol, + extra_args=chol_args, + scipy_args=chol_args, + ) self.pymc3_matches_scipy( - KroneckerNormal, dom, std_args, kron_normal_logpdf_evd, - extra_args=evd_args, scipy_args=evd_args) + KroneckerNormal, + dom, + std_args, + kron_normal_logpdf_evd, + extra_args=evd_args, + scipy_args=evd_args, + ) - dom = Domain([np.random.randn(2, N)*0.1], edges=(None, None), shape=(2, N)) + dom = Domain([np.random.randn(2, N) * 0.1], edges=(None, None), shape=(2, N)) self.pymc3_matches_scipy( - KroneckerNormal, dom, std_args, kron_normal_logpdf_cov, - extra_args=cov_args, scipy_args=cov_args) + KroneckerNormal, + dom, + std_args, + kron_normal_logpdf_cov, + extra_args=cov_args, + scipy_args=cov_args, + ) self.pymc3_matches_scipy( - KroneckerNormal, dom, std_args, kron_normal_logpdf_chol, - extra_args=chol_args, scipy_args=chol_args) + KroneckerNormal, + dom, + std_args, + kron_normal_logpdf_chol, + extra_args=chol_args, + scipy_args=chol_args, + ) self.pymc3_matches_scipy( - KroneckerNormal, dom, std_args, kron_normal_logpdf_evd, - extra_args=evd_args, scipy_args=evd_args) + KroneckerNormal, + dom, + std_args, + kron_normal_logpdf_evd, + extra_args=evd_args, + scipy_args=evd_args, + ) - @pytest.mark.parametrize('n', [1, 2]) + @pytest.mark.parametrize("n", [1, 2]) def test_mvt(self, n): - self.pymc3_matches_scipy(MvStudentT, Vector(R, n), - {'nu': Rplus, 'Sigma': PdMatrix(n), 'mu': Vector(R, n)}, - mvt_logpdf) - self.pymc3_matches_scipy(MvStudentT, RealMatrix(2, n), - {'nu': Rplus, 'Sigma': PdMatrix(n), 'mu': Vector(R, n)}, - mvt_logpdf) - - @pytest.mark.parametrize('n',[2,3,4]) - def test_AR1(self, n): - self.pymc3_matches_scipy(AR1, Vector(R, n), {'k': Unit, 'tau_e': Rplus}, AR1_logpdf) + self.pymc3_matches_scipy( + MvStudentT, + Vector(R, n), + {"nu": Rplus, "Sigma": PdMatrix(n), "mu": Vector(R, n)}, + mvt_logpdf, + ) + self.pymc3_matches_scipy( + MvStudentT, + RealMatrix(2, n), + {"nu": Rplus, "Sigma": PdMatrix(n), "mu": Vector(R, n)}, + mvt_logpdf, + ) + @pytest.mark.parametrize("n", [2, 3, 4]) + def test_AR1(self, n): + self.pymc3_matches_scipy( + AR1, Vector(R, n), {"k": Unit, "tau_e": Rplus}, AR1_logpdf + ) - @pytest.mark.parametrize('n', [2, 3]) + @pytest.mark.parametrize("n", [2, 3]) def test_wishart(self, n): # This check compares the autodiff gradient to the numdiff gradient. # However, due to the strict constraints of the wishart, @@ -945,195 +1314,236 @@ def test_wishart(self, n): # checks=[self.check_dlogp]) pass - @pytest.mark.parametrize('x,eta,n,lp', LKJ_CASES) + @pytest.mark.parametrize("x,eta,n,lp", LKJ_CASES) def test_lkj(self, x, eta, n, lp): with Model() as model: - LKJCorr('lkj', eta=eta, n=n, transform=None) + LKJCorr("lkj", eta=eta, n=n, transform=None) - pt = {'lkj': x} + pt = {"lkj": x} decimals = select_by_precision(float64=6, float32=4) assert_almost_equal(model.fastlogp(pt), lp, decimal=decimals, err_msg=str(pt)) - @pytest.mark.parametrize('n', [2, 3]) + @pytest.mark.parametrize("n", [2, 3]) def test_dirichlet(self, n): - self.pymc3_matches_scipy(Dirichlet, Simplex( - n), {'a': Vector(Rplus, n)}, dirichlet_logpdf) + self.pymc3_matches_scipy( + Dirichlet, Simplex(n), {"a": Vector(Rplus, n)}, dirichlet_logpdf + ) def test_dirichlet_2D(self): - self.pymc3_matches_scipy(Dirichlet, MultiSimplex(2, 2), - {'a': Vector(Vector(Rplus, 2), 2)}, dirichlet_logpdf) + self.pymc3_matches_scipy( + Dirichlet, + MultiSimplex(2, 2), + {"a": Vector(Vector(Rplus, 2), 2)}, + dirichlet_logpdf, + ) - @pytest.mark.parametrize('n', [2, 3]) + @pytest.mark.parametrize("n", [2, 3]) def test_multinomial(self, n): - self.pymc3_matches_scipy(Multinomial, Vector(Nat, n), {'p': Simplex(n), 'n': Nat}, - multinomial_logpdf) - - @pytest.mark.parametrize('p,n', [ - [[.25, .25, .25, .25], 1], - [[.3, .6, .05, .05], 2], - [[.3, .6, .05, .05], 10], - ]) + self.pymc3_matches_scipy( + Multinomial, Vector(Nat, n), {"p": Simplex(n), "n": Nat}, multinomial_logpdf + ) + + @pytest.mark.parametrize( + "p,n", + [ + [[0.25, 0.25, 0.25, 0.25], 1], + [[0.3, 0.6, 0.05, 0.05], 2], + [[0.3, 0.6, 0.05, 0.05], 10], + ], + ) def test_multinomial_mode(self, p, n): _p = np.array(p) with Model() as model: - m = Multinomial('m', n, _p, _p.shape) + m = Multinomial("m", n, _p, _p.shape) assert_allclose(m.distribution.mode.eval().sum(), n) _p = np.array([p, p]) with Model() as model: - m = Multinomial('m', n, _p, _p.shape) + m = Multinomial("m", n, _p, _p.shape) assert_allclose(m.distribution.mode.eval().sum(axis=-1), n) - @pytest.mark.parametrize('p, shape, n', [ - [[.25, .25, .25, .25], 4, 2], - [[.25, .25, .25, .25], (1, 4), 3], - # 3: expect to fail - # [[.25, .25, .25, .25], (10, 4)], - [[.25, .25, .25, .25], (10, 1, 4), 5], - # 5: expect to fail - # [[[.25, .25, .25, .25]], (2, 4), [7, 11]], - [[[.25, .25, .25, .25], - [.25, .25, .25, .25]], (2, 4), 13], - [[[.25, .25, .25, .25], - [.25, .25, .25, .25]], (1, 2, 4), [23, 29]], - [[[.25, .25, .25, .25], - [.25, .25, .25, .25]], (10, 2, 4), [31, 37]], - [[[.25, .25, .25, .25], - [.25, .25, .25, .25]], (2, 4), [17, 19]], - ]) + @pytest.mark.parametrize( + "p, shape, n", + [ + [[0.25, 0.25, 0.25, 0.25], 4, 2], + [[0.25, 0.25, 0.25, 0.25], (1, 4), 3], + # 3: expect to fail + # [[.25, .25, .25, .25], (10, 4)], + [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5], + # 5: expect to fail + # [[[.25, .25, .25, .25]], (2, 4), [7, 11]], + [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13], + [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]], + [ + [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], + (10, 2, 4), + [31, 37], + ], + [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]], + ], + ) def test_multinomial_random(self, p, shape, n): p = np.asarray(p) with Model() as model: - m = Multinomial('m', n=n, p=p, shape=shape) + m = Multinomial("m", n=n, p=p, shape=shape) m.random() def test_multinomial_mode_with_shape(self): n = [1, 10] - p = np.asarray([[.25,.25,.25,.25], [.26, .26, .26, .22]]) + p = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]]) with Model() as model: - m = Multinomial('m', n=n, p=p, shape=(2, 4)) + m = Multinomial("m", n=n, p=p, shape=(2, 4)) assert_allclose(m.distribution.mode.eval().sum(axis=-1), n) def test_multinomial_vec(self): - vals = np.array([[2,4,4], [3,3,4]]) + vals = np.array([[2, 4, 4], [3, 3, 4]]) p = np.array([0.2, 0.3, 0.5]) n = 10 with Model() as model_single: - Multinomial('m', n=n, p=p, shape=len(p)) + Multinomial("m", n=n, p=p, shape=len(p)) with Model() as model_many: - Multinomial('m', n=n, p=p, shape=vals.shape) + Multinomial("m", n=n, p=p, shape=vals.shape) - assert_almost_equal(scipy.stats.multinomial.logpmf(vals, n, p), - np.asarray([model_single.fastlogp({'m': val}) for val in vals]), - decimal=4) + assert_almost_equal( + scipy.stats.multinomial.logpmf(vals, n, p), + np.asarray([model_single.fastlogp({"m": val}) for val in vals]), + decimal=4, + ) - assert_almost_equal(scipy.stats.multinomial.logpmf(vals, n, p), - model_many.free_RVs[0].logp_elemwise({'m': vals}).squeeze(), - decimal=4) + assert_almost_equal( + scipy.stats.multinomial.logpmf(vals, n, p), + model_many.free_RVs[0].logp_elemwise({"m": vals}).squeeze(), + decimal=4, + ) - assert_almost_equal(sum([model_single.fastlogp({'m': val}) for val in vals]), - model_many.fastlogp({'m': vals}), - decimal=4) + assert_almost_equal( + sum([model_single.fastlogp({"m": val}) for val in vals]), + model_many.fastlogp({"m": vals}), + decimal=4, + ) def test_multinomial_vec_1d_n(self): - vals = np.array([[2,4,4], [4,3,4]]) + vals = np.array([[2, 4, 4], [4, 3, 4]]) p = np.array([0.2, 0.3, 0.5]) ns = np.array([10, 11]) with Model() as model: - Multinomial('m', n=ns, p=p, shape=vals.shape) + Multinomial("m", n=ns, p=p, shape=vals.shape) - assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, n in zip(vals, ns)]), - model.fastlogp({'m': vals}), - decimal=4) + assert_almost_equal( + sum([multinomial_logpdf(val, n, p) for val, n in zip(vals, ns)]), + model.fastlogp({"m": vals}), + decimal=4, + ) def test_multinomial_vec_1d_n_2d_p(self): - vals = np.array([[2,4,4], [4,3,4]]) - ps = np.array([[0.2, 0.3, 0.5], - [0.9, 0.09, 0.01]]) + vals = np.array([[2, 4, 4], [4, 3, 4]]) + ps = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]]) ns = np.array([10, 11]) with Model() as model: - Multinomial('m', n=ns, p=ps, shape=vals.shape) + Multinomial("m", n=ns, p=ps, shape=vals.shape) - assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, n, p in zip(vals, ns, ps)]), - model.fastlogp({'m': vals}), - decimal=4) + assert_almost_equal( + sum([multinomial_logpdf(val, n, p) for val, n, p in zip(vals, ns, ps)]), + model.fastlogp({"m": vals}), + decimal=4, + ) def test_multinomial_vec_2d_p(self): - vals = np.array([[2,4,4], [3,3,4]]) - ps = np.array([[0.2, 0.3, 0.5], - [0.3, 0.3, 0.4]]) + vals = np.array([[2, 4, 4], [3, 3, 4]]) + ps = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]]) n = 10 with Model() as model: - Multinomial('m', n=n, p=ps, shape=vals.shape) + Multinomial("m", n=n, p=ps, shape=vals.shape) - assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]), - model.fastlogp({'m': vals}), - decimal=4) + assert_almost_equal( + sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]), + model.fastlogp({"m": vals}), + decimal=4, + ) def test_categorical_bounds(self): with Model(): - x = Categorical('x', p=np.array([0.2, 0.3, 0.5])) - assert np.isinf(x.logp({'x': -1})) - assert np.isinf(x.logp({'x': 3})) + x = Categorical("x", p=np.array([0.2, 0.3, 0.5])) + assert np.isinf(x.logp({"x": -1})) + assert np.isinf(x.logp({"x": 3})) - @pytest.mark.parametrize('n', [2, 3, 4]) + @pytest.mark.parametrize("n", [2, 3, 4]) def test_categorical(self, n): - self.pymc3_matches_scipy(Categorical, Domain(range(n), 'int64'), {'p': Simplex(n)}, - lambda value, p: categorical_logpdf(value, p)) + self.pymc3_matches_scipy( + Categorical, + Domain(range(n), "int64"), + {"p": Simplex(n)}, + lambda value, p: categorical_logpdf(value, p), + ) - @pytest.mark.parametrize('n', [2, 3, 4]) + @pytest.mark.parametrize("n", [2, 3, 4]) def test_orderedlogistic(self, n): - self.pymc3_matches_scipy(OrderedLogistic, Domain(range(n), 'int64'), - {'eta': R, 'cutpoints': Vector(R, n-1)}, - lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints)) + self.pymc3_matches_scipy( + OrderedLogistic, + Domain(range(n), "int64"), + {"eta": R, "cutpoints": Vector(R, n - 1)}, + lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints), + ) def test_densitydist(self): def logp(x): - return -log(2 * .5) - abs(x - .5) / .5 - self.checkd(DensityDist, R, {}, extra_args={'logp': logp}) + return -log(2 * 0.5) - abs(x - 0.5) / 0.5 + + self.checkd(DensityDist, R, {}, extra_args={"logp": logp}) def test_addpotential(self): with Model() as model: - value = Normal('value', 1, 1) - Potential('value_squared', -value ** 2) + value = Normal("value", 1, 1) + Potential("value_squared", -value ** 2) self.check_dlogp(model, value, R, {}) def test_get_tau_sd(self): sd = np.array([2]) - assert_almost_equal(continuous.get_tau_sd(sd=sd), [1. / sd**2, sd]) - - @pytest.mark.parametrize('value,mu,sigma,nu,logp', [ - (0.5, -50.000, 0.500, 0.500, -99.8068528), - (1.0, -1.000, 0.001, 0.001, -1992.5922447), - (2.0, 0.001, 1.000, 1.000, -1.6720416), - (5.0, 0.500, 2.500, 2.500, -2.4543644), - (7.5, 2.000, 5.000, 5.000, -2.8259429), - (15.0, 5.000, 7.500, 7.500, -3.3093854), - (50.0, 50.000, 10.000, 10.000, -3.6436067), - (1000.0, 500.000, 10.000, 20.000, -27.8707323) - ]) + assert_almost_equal(continuous.get_tau_sd(sd=sd), [1.0 / sd ** 2, sd]) + + @pytest.mark.parametrize( + "value,mu,sigma,nu,logp", + [ + (0.5, -50.000, 0.500, 0.500, -99.8068528), + (1.0, -1.000, 0.001, 0.001, -1992.5922447), + (2.0, 0.001, 1.000, 1.000, -1.6720416), + (5.0, 0.500, 2.500, 2.500, -2.4543644), + (7.5, 2.000, 5.000, 5.000, -2.8259429), + (15.0, 5.000, 7.500, 7.500, -3.3093854), + (50.0, 50.000, 10.000, 10.000, -3.6436067), + (1000.0, 500.000, 10.000, 20.000, -27.8707323), + ], + ) def test_ex_gaussian(self, value, mu, sigma, nu, logp): """Log probabilities calculated using the dexGAUS function from the R package gamlss. See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or http://www.gamlss.org/.""" with Model() as model: - ExGaussian('eg', mu=mu, sigma=sigma, nu=nu) - pt = {'eg': value} - assert_almost_equal(model.fastlogp(pt), logp, decimal=select_by_precision(float64=6, float32=2), err_msg=str(pt)) - - @pytest.mark.parametrize('value,mu,sigma,nu,logcdf', [ - (0.5, -50.000, 0.500, 0.500, 0.0000000), - (1.0, -1.000, 0.001, 0.001, 0.0000000), - (2.0, 0.001, 1.000, 1.000, -0.2365674), - (5.0, 0.500, 2.500, 2.500, -0.2886489), - (7.5, 2.000, 5.000, 5.000, -0.5655104), - (15.0, 5.000, 7.500, 7.500, -0.4545255), - (50.0, 50.000, 10.000, 10.000, -1.433714), - (1000.0, 500.000, 10.000, 20.000, -1.573708e-11), - ]) + ExGaussian("eg", mu=mu, sigma=sigma, nu=nu) + pt = {"eg": value} + assert_almost_equal( + model.fastlogp(pt), + logp, + decimal=select_by_precision(float64=6, float32=2), + err_msg=str(pt), + ) + + @pytest.mark.parametrize( + "value,mu,sigma,nu,logcdf", + [ + (0.5, -50.000, 0.500, 0.500, 0.0000000), + (1.0, -1.000, 0.001, 0.001, 0.0000000), + (2.0, 0.001, 1.000, 1.000, -0.2365674), + (5.0, 0.500, 2.500, 2.500, -0.2886489), + (7.5, 2.000, 5.000, 5.000, -0.5655104), + (15.0, 5.000, 7.500, 7.500, -0.4545255), + (50.0, 50.000, 10.000, 10.000, -1.433714), + (1000.0, 500.000, 10.000, 20.000, -1.573708e-11), + ], + ) def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf): """Log probabilities calculated using the pexGAUS function from the R package gamlss. See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or http://www.gamlss.org/.""" @@ -1141,68 +1551,94 @@ def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf): ExGaussian.dist(mu=mu, sigma=sigma, nu=nu).logcdf(value).tag.test_value, logcdf, decimal=select_by_precision(float64=6, float32=2), - err_msg=str((value, mu, sigma, nu, logcdf))) + err_msg=str((value, mu, sigma, nu, logcdf)), + ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_vonmises(self): self.pymc3_matches_scipy( - VonMises, R, {'mu': Circ, 'kappa': Rplus}, - lambda value, mu, kappa: floatX(sp.vonmises.logpdf(value, kappa, loc=mu))) + VonMises, + R, + {"mu": Circ, "kappa": Rplus}, + lambda value, mu, kappa: floatX(sp.vonmises.logpdf(value, kappa, loc=mu)), + ) def test_gumbel(self): def gumbel(value, mu, beta): return floatX(sp.gumbel_r.logpdf(value, loc=mu, scale=beta)) - self.pymc3_matches_scipy(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbel) + + self.pymc3_matches_scipy(Gumbel, R, {"mu": R, "beta": Rplusbig}, gumbel) def gumbellcdf(value, mu, beta): return floatX(sp.gumbel_r.logcdf(value, loc=mu, scale=beta)) - self.check_logcdf(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbellcdf) + + self.check_logcdf(Gumbel, R, {"mu": R, "beta": Rplusbig}, gumbellcdf) def test_logistic(self): - self.pymc3_matches_scipy(Logistic, R, {'mu': R, 's': Rplus}, - lambda value, mu, s: sp.logistic.logpdf(value, mu, s), - decimal=select_by_precision(float64=6, float32=1)) - self.check_logcdf(Logistic, R, {'mu': R, 's': Rplus}, - lambda value, mu, s: sp.logistic.logcdf(value, mu, s), - decimal=select_by_precision(float64=6, float32=1)) + self.pymc3_matches_scipy( + Logistic, + R, + {"mu": R, "s": Rplus}, + lambda value, mu, s: sp.logistic.logpdf(value, mu, s), + decimal=select_by_precision(float64=6, float32=1), + ) + self.check_logcdf( + Logistic, + R, + {"mu": R, "s": Rplus}, + lambda value, mu, s: sp.logistic.logcdf(value, mu, s), + decimal=select_by_precision(float64=6, float32=1), + ) def test_logitnormal(self): - self.pymc3_matches_scipy(LogitNormal, Unit, {'mu': R, 'sd': Rplus}, - lambda value, mu, sd: (sp.norm.logpdf(logit(value), mu, sd) - - (np.log(value) + np.log1p(-value))), - decimal=select_by_precision(float64=6, float32=1)) + self.pymc3_matches_scipy( + LogitNormal, + Unit, + {"mu": R, "sd": Rplus}, + lambda value, mu, sd: ( + sp.norm.logpdf(logit(value), mu, sd) + - (np.log(value) + np.log1p(-value)) + ), + decimal=select_by_precision(float64=6, float32=1), + ) def test_multidimensional_beta_construction(self): with Model(): - Beta('beta', alpha=1., beta=1., shape=(10, 20)) + Beta("beta", alpha=1.0, beta=1.0, shape=(10, 20)) def test_rice(self): - self.pymc3_matches_scipy(Rice, Rplus, {'nu': Rplus, 'sd': Rplusbig}, - lambda value, nu, sd: sp.rice.logpdf(value, b=nu, loc=0, scale=sd)) + self.pymc3_matches_scipy( + Rice, + Rplus, + {"nu": Rplus, "sd": Rplusbig}, + lambda value, nu, sd: sp.rice.logpdf(value, b=nu, loc=0, scale=sd), + ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_interpolated(self): for mu in R.vals: for sd in Rplus.vals: - #pylint: disable=cell-var-from-loop + # pylint: disable=cell-var-from-loop xmin = mu - 5 * sd xmax = mu + 5 * sd - class TestedInterpolated (Interpolated): + class TestedInterpolated(Interpolated): def __init__(self, **kwargs): x_points = np.linspace(xmin, xmax, 100000) pdf_points = sp.norm.pdf(x_points, loc=mu, scale=sd) super(TestedInterpolated, self).__init__( - x_points=x_points, - pdf_points=pdf_points, - **kwargs + x_points=x_points, pdf_points=pdf_points, **kwargs ) def ref_pdf(value): return np.where( np.logical_and(value >= xmin, value <= xmax), sp.norm.logpdf(value, mu, sd), - -np.inf * np.ones(value.shape) + -np.inf * np.ones(value.shape), ) self.pymc3_matches_scipy(TestedInterpolated, R, {}, ref_pdf) @@ -1213,7 +1649,7 @@ def test_bound(): UnboundNormal = Bound(Normal) dist = UnboundNormal.dist(mu=0, sd=1) assert dist.transform is None - assert dist.default() == 0. + assert dist.default() == 0.0 assert isinstance(dist.random(), np.ndarray) LowerNormal = Bound(Normal, lower=1) @@ -1237,13 +1673,13 @@ def test_bound(): assert dist.transform is not None with pytest.raises(ValueError) as err: dist.random() - err.match('Drawing samples from distributions with array-valued') + err.match("Drawing samples from distributions with array-valued") with Model(): - a = ArrayNormal('c', shape=2) + a = ArrayNormal("c", shape=2) assert_equal(a.tag.test_value, np.array([1.5, 2.5])) - lower = tt.vector('lower') + lower = tt.vector("lower") lower.tag.test_value = np.array([1, 2]).astype(theano.config.floatX) upper = 3 ArrayNormal = Bound(Normal, lower=lower, upper=upper) @@ -1254,7 +1690,7 @@ def test_bound(): assert dist.transform is not None with Model(): - a = ArrayNormal('c', shape=2) + a = ArrayNormal("c", shape=2) assert_equal(a.tag.test_value, np.array([2, 2.5])) rand = Bound(Binomial, lower=10).dist(n=20, p=0.3).random() @@ -1275,7 +1711,6 @@ def test_bound(): class TestLatex(object): - def setup_class(self): # True parameter values alpha, sigma = 1, 1 @@ -1288,25 +1723,25 @@ def setup_class(self): X = np.random.normal(size=(size, 2)).dot(np.array([[1, 0], [0, 0.2]])) # Simulate outcome variable - Y = alpha + X.dot(beta) + np.random.randn(size)*sigma + Y = alpha + X.dot(beta) + np.random.randn(size) * sigma with Model() as self.model: # Priors for unknown model parameters - alpha = Normal('alpha', mu=0, sd=10) - b = Normal('beta', mu=0, sd=10, shape=(2,), observed=beta) - sigma = HalfNormal('sigma', sd=1) + alpha = Normal("alpha", mu=0, sd=10) + b = Normal("beta", mu=0, sd=10, shape=(2,), observed=beta) + sigma = HalfNormal("sigma", sd=1) # Expected value of outcome - mu = Deterministic('mu', alpha + tt.dot(X, b)) + mu = Deterministic("mu", alpha + tt.dot(X, b)) # Likelihood (sampling distribution) of observations - Y_obs = Normal('Y_obs', mu=mu, sd=sigma, observed=Y) + Y_obs = Normal("Y_obs", mu=mu, sd=sigma, observed=Y) self.distributions = [alpha, sigma, mu, b, Y_obs] self.expected = ( - r'$\text{alpha} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$', - r'$\text{sigma} \sim \text{HalfNormal}(\mathit{sd}=1.0)$', - r'$\text{mu} \sim \text{Deterministic}(\text{alpha},~\text{Constant},~\text{beta})$', - r'$\text{beta} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$', - r'$\text{Y_obs} \sim \text{Normal}(\mathit{mu}=\text{mu},~\mathit{sd}=f(\text{sigma}))$' + r"$\text{alpha} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$", + r"$\text{sigma} \sim \text{HalfNormal}(\mathit{sd}=1.0)$", + r"$\text{mu} \sim \text{Deterministic}(\text{alpha},~\text{Constant},~\text{beta})$", + r"$\text{beta} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$", + r"$\text{Y_obs} \sim \text{Normal}(\mathit{mu}=\text{mu},~\mathit{sd}=f(\text{sigma}))$", ) def test__repr_latex_(self): @@ -1316,7 +1751,7 @@ def test__repr_latex_(self): model_tex = self.model._repr_latex_() for tex in self.expected: # make sure each variable is in the model - for segment in tex.strip('$').split(r'\sim'): + for segment in tex.strip("$").split(r"\sim"): assert segment in model_tex def test___latex__(self): @@ -1327,9 +1762,9 @@ def test___latex__(self): def test_discrete_trafo(): with pytest.raises(ValueError) as err: - Binomial.dist(n=5, p=0.5, transform='log') - err.match('Transformations for discrete distributions') + Binomial.dist(n=5, p=0.5, transform="log") + err.match("Transformations for discrete distributions") with Model(): with pytest.raises(ValueError) as err: - Binomial('a', n=5, p=0.5, transform='log') - err.match('Transformations for discrete distributions') + Binomial("a", n=5, p=0.5, transform="log") + err.match("Transformations for discrete distributions") diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 9bc2841624..6a15f81aed 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -13,15 +13,38 @@ from pymc3.distributions.distribution import draw_values from .helpers import SeededTest from .test_distributions import ( - build_model, Domain, product, R, Rplus, Rplusbig, Rplusdunif, - Unit, Nat, NatSmall, I, Simplex, Vector, PdMatrix, - PdMatrixChol, PdMatrixCholUpper, RealMatrix, RandomPdMatrix + build_model, + Domain, + product, + R, + Rplus, + Rplusbig, + Rplusdunif, + Unit, + Nat, + NatSmall, + I, + Simplex, + Vector, + PdMatrix, + PdMatrixChol, + PdMatrixCholUpper, + RealMatrix, + RandomPdMatrix, ) -def pymc3_random(dist, paramdomains, ref_rand, valuedomain=Domain([0]), - size=10000, alpha=0.05, fails=10, extra_args=None, - model_args=None): +def pymc3_random( + dist, + paramdomains, + ref_rand, + valuedomain=Domain([0]), + size=10000, + alpha=0.05, + fails=10, + extra_args=None, + model_args=None, +): if model_args is None: model_args = {} model = build_model(dist, valuedomain, paramdomains, extra_args) @@ -34,17 +57,22 @@ def pymc3_random(dist, paramdomains, ref_rand, valuedomain=Domain([0]), # a certain number of times. Crude, but necessary. f = fails while p <= alpha and f > 0: - s0 = model.named_vars['value'].random(size=size, point=pt) + s0 = model.named_vars["value"].random(size=size, point=pt) s1 = ref_rand(size=size, **pt) - _, p = st.ks_2samp(np.atleast_1d(s0).flatten(), - np.atleast_1d(s1).flatten()) + _, p = st.ks_2samp(np.atleast_1d(s0).flatten(), np.atleast_1d(s1).flatten()) f -= 1 assert p > alpha, str(pt) -def pymc3_random_discrete(dist, paramdomains, - valuedomain=Domain([0]), ref_rand=None, - size=100000, alpha=0.05, fails=20): +def pymc3_random_discrete( + dist, + paramdomains, + valuedomain=Domain([0]), + ref_rand=None, + size=100000, + alpha=0.05, + fails=20, +): model = build_model(dist, valuedomain, paramdomains) domains = paramdomains.copy() for pt in product(domains, n_samples=100): @@ -54,7 +82,7 @@ def pymc3_random_discrete(dist, paramdomains, # a certain number of times. f = fails while p <= alpha and f > 0: - o = model.named_vars['value'].random(size=size, point=pt) + o = model.named_vars["value"].random(size=size, point=pt) e = ref_rand(size=size, **pt) o = np.atleast_1d(o).flatten() e = np.atleast_1d(e).flatten() @@ -64,7 +92,7 @@ def pymc3_random_discrete(dist, paramdomains, expected[e] = (observed.get(e, 0), expected[e]) k = np.array([v for v in expected.values()]) if np.all(k[:, 0] == k[:, 1]): - p = 1. + p = 1.0 else: _, p = st.chisquare(k[:, 0], k[:, 1]) f -= 1 @@ -74,23 +102,23 @@ def pymc3_random_discrete(dist, paramdomains, class TestDrawValues(SeededTest): def test_draw_scalar_parameters(self): with pm.Model(): - y = pm.Normal('y1', mu=0., sd=1.) + y = pm.Normal("y1", mu=0.0, sd=1.0) mu, tau = draw_values([y.distribution.mu, y.distribution.tau]) npt.assert_almost_equal(mu, 0) npt.assert_almost_equal(tau, 1) def test_draw_dependencies(self): with pm.Model(): - x = pm.Normal('x', mu=0., sd=1.) - exp_x = pm.Deterministic('exp_x', pm.math.exp(x)) + x = pm.Normal("x", mu=0.0, sd=1.0) + exp_x = pm.Deterministic("exp_x", pm.math.exp(x)) x, exp_x = draw_values([x, exp_x]) npt.assert_almost_equal(np.exp(x), exp_x) def test_draw_order(self): with pm.Model(): - x = pm.Normal('x', mu=0., sd=1.) - exp_x = pm.Deterministic('exp_x', pm.math.exp(x)) + x = pm.Normal("x", mu=0.0, sd=1.0) + exp_x = pm.Deterministic("exp_x", pm.math.exp(x)) # Need to draw x before drawing log_x exp_x, x = draw_values([exp_x, x]) @@ -98,19 +126,20 @@ def test_draw_order(self): def test_draw_point_replacement(self): with pm.Model(): - mu = pm.Normal('mu', mu=0., tau=1e-3) - sigma = pm.Gamma('sigma', alpha=1., beta=1., transform=None) - y = pm.Normal('y', mu=mu, sd=sigma) - mu2, tau2 = draw_values([y.distribution.mu, y.distribution.tau], - point={'mu': 5., 'sigma': 2.}) + mu = pm.Normal("mu", mu=0.0, tau=1e-3) + sigma = pm.Gamma("sigma", alpha=1.0, beta=1.0, transform=None) + y = pm.Normal("y", mu=mu, sd=sigma) + mu2, tau2 = draw_values( + [y.distribution.mu, y.distribution.tau], point={"mu": 5.0, "sigma": 2.0} + ) npt.assert_almost_equal(mu2, 5) - npt.assert_almost_equal(tau2, 1 / 2.**2) + npt.assert_almost_equal(tau2, 1 / 2.0 ** 2) def test_random_sample_returns_nd_array(self): with pm.Model(): - mu = pm.Normal('mu', mu=0., tau=1e-3) - sigma = pm.Gamma('sigma', alpha=1., beta=1., transform=None) - y = pm.Normal('y', mu=mu, sd=sigma) + mu = pm.Normal("mu", mu=0.0, tau=1e-3) + sigma = pm.Gamma("sigma", alpha=1.0, beta=1.0, transform=None) + y = pm.Normal("y", mu=mu, sd=sigma) mu, tau = draw_values([y.distribution.mu, y.distribution.tau]) assert isinstance(mu, np.ndarray) assert isinstance(tau, np.ndarray) @@ -126,8 +155,10 @@ def setup_method(self, *args, **kwargs): def get_random_variable(self, shape, with_vector_params=False, name=None): if with_vector_params: - params = {key: value * np.ones(self.shape, dtype=np.dtype(type(value))) for - key, value in self.params.items()} + params = { + key: value * np.ones(self.shape, dtype=np.dtype(type(value))) + for key, value in self.params.items() + } else: params = self.params if name is None: @@ -136,7 +167,9 @@ def get_random_variable(self, shape, with_vector_params=False, name=None): if shape is None: return self.distribution(name, transform=None, **params) else: - return self.distribution(name, shape=shape, transform=None, **params) + return self.distribution( + name, shape=shape, transform=None, **params + ) @staticmethod def sample_random_variable(random_variable, size): @@ -145,17 +178,17 @@ def sample_random_variable(random_variable, size): except AttributeError: return random_variable.distribution.random(size=size) - @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str) + @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str) def test_scalar_parameter_shape(self, size): rv = self.get_random_variable(None) if size is None: - expected = 1, + expected = (1,) else: expected = np.atleast_1d(size).tolist() actual = np.atleast_1d(self.sample_random_variable(rv, size)).shape assert tuple(expected) == actual - @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str) + @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str) def test_scalar_shape(self, size): shape = 10 rv = self.get_random_variable(shape) @@ -168,7 +201,7 @@ def test_scalar_shape(self, size): actual = np.atleast_1d(self.sample_random_variable(rv, size)).shape assert tuple(expected) == actual - @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str) + @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str) def test_parameters_1d_shape(self, size): rv = self.get_random_variable(self.shape, with_vector_params=True) if size is None: @@ -179,7 +212,7 @@ def test_parameters_1d_shape(self, size): actual = self.sample_random_variable(rv, size).shape assert tuple(expected) == actual - @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str) + @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str) def test_broadcast_shape(self, size): broadcast_shape = (2 * self.shape, self.shape) rv = self.get_random_variable(broadcast_shape, with_vector_params=True) @@ -191,11 +224,13 @@ def test_broadcast_shape(self, size): actual = np.atleast_1d(self.sample_random_variable(rv, size)).shape assert tuple(expected) == actual - @pytest.mark.parametrize('shape', [(), (1,), (1, 1), (1, 2), (10, 10, 1), (10, 10, 2)], ids=str) + @pytest.mark.parametrize( + "shape", [(), (1,), (1, 1), (1, 2), (10, 10, 1), (10, 10, 2)], ids=str + ) def test_different_shapes_and_sample_sizes(self, shape): prefix = self.distribution.__name__ - rv = self.get_random_variable(shape, name='%s_%s' % (prefix, shape)) + rv = self.get_random_variable(shape, name="%s_%s" % (prefix, shape)) for size in (None, 1, 5, (4, 5)): if size is None: s = [] @@ -215,191 +250,199 @@ def test_different_shapes_and_sample_sizes(self, shape): class TestNormal(BaseTestCases.BaseTestCase): distribution = pm.Normal - params = {'mu': 0., 'tau': 1.} + params = {"mu": 0.0, "tau": 1.0} + class TestTruncatedNormal(BaseTestCases.BaseTestCase): distribution = pm.TruncatedNormal - params = {'mu': 0., 'tau': 1., 'lower':-0.5, 'upper':0.5} + params = {"mu": 0.0, "tau": 1.0, "lower": -0.5, "upper": 0.5} + class TestSkewNormal(BaseTestCases.BaseTestCase): distribution = pm.SkewNormal - params = {'mu': 0., 'sd': 1., 'alpha': 5.} + params = {"mu": 0.0, "sd": 1.0, "alpha": 5.0} class TestHalfNormal(BaseTestCases.BaseTestCase): distribution = pm.HalfNormal - params = {'tau': 1.} + params = {"tau": 1.0} class TestUniform(BaseTestCases.BaseTestCase): distribution = pm.Uniform - params = {'lower': 0., 'upper': 1.} + params = {"lower": 0.0, "upper": 1.0} class TestTriangular(BaseTestCases.BaseTestCase): distribution = pm.Triangular - params = {'c': 0.5, 'lower': 0., 'upper': 1.} + params = {"c": 0.5, "lower": 0.0, "upper": 1.0} class TestWald(BaseTestCases.BaseTestCase): distribution = pm.Wald - params = {'mu': 1., 'lam': 1., 'alpha': 0.} + params = {"mu": 1.0, "lam": 1.0, "alpha": 0.0} class TestBeta(BaseTestCases.BaseTestCase): distribution = pm.Beta - params = {'alpha': 1., 'beta': 1.} + params = {"alpha": 1.0, "beta": 1.0} class TestKumaraswamy(BaseTestCases.BaseTestCase): distribution = pm.Kumaraswamy - params = {'a': 1., 'b': 1.} + params = {"a": 1.0, "b": 1.0} class TestExponential(BaseTestCases.BaseTestCase): distribution = pm.Exponential - params = {'lam': 1.} + params = {"lam": 1.0} class TestLaplace(BaseTestCases.BaseTestCase): distribution = pm.Laplace - params = {'mu': 1., 'b': 1.} + params = {"mu": 1.0, "b": 1.0} class TestLognormal(BaseTestCases.BaseTestCase): distribution = pm.Lognormal - params = {'mu': 1., 'tau': 1.} + params = {"mu": 1.0, "tau": 1.0} class TestStudentT(BaseTestCases.BaseTestCase): distribution = pm.StudentT - params = {'nu': 5., 'mu': 0., 'lam': 1.} + params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} class TestPareto(BaseTestCases.BaseTestCase): distribution = pm.Pareto - params = {'alpha': 0.5, 'm': 1.} + params = {"alpha": 0.5, "m": 1.0} class TestCauchy(BaseTestCases.BaseTestCase): distribution = pm.Cauchy - params = {'alpha': 1., 'beta': 1.} + params = {"alpha": 1.0, "beta": 1.0} class TestHalfCauchy(BaseTestCases.BaseTestCase): distribution = pm.HalfCauchy - params = {'beta': 1.} + params = {"beta": 1.0} class TestGamma(BaseTestCases.BaseTestCase): distribution = pm.Gamma - params = {'alpha': 1., 'beta': 1.} + params = {"alpha": 1.0, "beta": 1.0} class TestInverseGamma(BaseTestCases.BaseTestCase): distribution = pm.InverseGamma - params = {'alpha': 0.5, 'beta': 0.5} + params = {"alpha": 0.5, "beta": 0.5} class TestChiSquared(BaseTestCases.BaseTestCase): distribution = pm.ChiSquared - params = {'nu': 2.} + params = {"nu": 2.0} class TestWeibull(BaseTestCases.BaseTestCase): distribution = pm.Weibull - params = {'alpha': 1., 'beta': 1.} + params = {"alpha": 1.0, "beta": 1.0} class TestExGaussian(BaseTestCases.BaseTestCase): distribution = pm.ExGaussian - params = {'mu': 0., 'sigma': 1., 'nu': 1.} + params = {"mu": 0.0, "sigma": 1.0, "nu": 1.0} class TestVonMises(BaseTestCases.BaseTestCase): distribution = pm.VonMises - params = {'mu': 0., 'kappa': 1.} + params = {"mu": 0.0, "kappa": 1.0} class TestGumbel(BaseTestCases.BaseTestCase): distribution = pm.Gumbel - params = {'mu': 0., 'beta': 1.} + params = {"mu": 0.0, "beta": 1.0} class TestLogistic(BaseTestCases.BaseTestCase): distribution = pm.Logistic - params = {'mu': 0., 's': 1.} + params = {"mu": 0.0, "s": 1.0} class TestLogitNormal(BaseTestCases.BaseTestCase): distribution = pm.LogitNormal - params = {'mu': 0., 'sd': 1.} + params = {"mu": 0.0, "sd": 1.0} class TestBinomial(BaseTestCases.BaseTestCase): distribution = pm.Binomial - params = {'n': 5, 'p': 0.5} + params = {"n": 5, "p": 0.5} class TestBetaBinomial(BaseTestCases.BaseTestCase): distribution = pm.BetaBinomial - params = {'n': 5, 'alpha': 1., 'beta': 1.} + params = {"n": 5, "alpha": 1.0, "beta": 1.0} class TestBernoulli(BaseTestCases.BaseTestCase): distribution = pm.Bernoulli - params = {'p': 0.5} + params = {"p": 0.5} class TestDiscreteWeibull(BaseTestCases.BaseTestCase): distribution = pm.DiscreteWeibull - params = {'q': 0.25, 'beta': 2.} + params = {"q": 0.25, "beta": 2.0} class TestPoisson(BaseTestCases.BaseTestCase): distribution = pm.Poisson - params = {'mu': 1.} + params = {"mu": 1.0} class TestNegativeBinomial(BaseTestCases.BaseTestCase): distribution = pm.NegativeBinomial - params = {'mu': 1., 'alpha': 1.} + params = {"mu": 1.0, "alpha": 1.0} class TestConstant(BaseTestCases.BaseTestCase): distribution = pm.Constant - params = {'c': 3} + params = {"c": 3} class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedPoisson - params = {'theta': 1., 'psi': 0.3} + params = {"theta": 1.0, "psi": 0.3} class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedNegativeBinomial - params = {'mu': 1., 'alpha': 1., 'psi': 0.3} + params = {"mu": 1.0, "alpha": 1.0, "psi": 0.3} + class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedBinomial - params = {'n': 10, 'p': 0.6, 'psi': 0.3} + params = {"n": 10, "p": 0.6, "psi": 0.3} + class TestDiscreteUniform(BaseTestCases.BaseTestCase): distribution = pm.DiscreteUniform - params = {'lower': 0., 'upper': 10.} + params = {"lower": 0.0, "upper": 10.0} class TestGeometric(BaseTestCases.BaseTestCase): distribution = pm.Geometric - params = {'p': 0.5} + params = {"p": 0.5} class TestCategorical(BaseTestCases.BaseTestCase): distribution = pm.Categorical - params = {'p': np.ones(BaseTestCases.BaseTestCase.shape)} + params = {"p": np.ones(BaseTestCases.BaseTestCase.shape)} - def get_random_variable(self, shape, with_vector_params=False, **kwargs): # don't transform categories - return super(TestCategorical, self).get_random_variable(shape, with_vector_params=False, **kwargs) + def get_random_variable( + self, shape, with_vector_params=False, **kwargs + ): # don't transform categories + return super(TestCategorical, self).get_random_variable( + shape, with_vector_params=False, **kwargs + ) def test_probability_vector_shape(self): """Check that if a 2d array of probabilities are passed to categorical correct shape is returned""" @@ -414,173 +457,235 @@ def test_bounded(self): def ref_rand(size, tau): return -st.halfnorm.rvs(size=size, loc=0, scale=tau ** -0.5) - pymc3_random(BoundedNormal, {'tau': Rplus}, ref_rand=ref_rand) + + pymc3_random(BoundedNormal, {"tau": Rplus}, ref_rand=ref_rand) def test_uniform(self): def ref_rand(size, lower, upper): return st.uniform.rvs(size=size, loc=lower, scale=upper - lower) - pymc3_random(pm.Uniform, {'lower': -Rplus, 'upper': Rplus}, ref_rand=ref_rand) + pymc3_random(pm.Uniform, {"lower": -Rplus, "upper": Rplus}, ref_rand=ref_rand) def test_normal(self): def ref_rand(size, mu, sd): return st.norm.rvs(size=size, loc=mu, scale=sd) - pymc3_random(pm.Normal, {'mu': R, 'sd': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.Normal, {"mu": R, "sd": Rplus}, ref_rand=ref_rand) def test_truncated_normal(self): def ref_rand(size, mu, sd, lower, upper): - return st.truncnorm.rvs((lower-mu)/sd, (upper-mu)/sd, size=size, loc=mu, scale=sd) - pymc3_random(pm.TruncatedNormal, {'mu': R, 'sd': Rplusbig, 'lower':-Rplusbig, 'upper':Rplusbig}, - ref_rand=ref_rand) + return st.truncnorm.rvs( + (lower - mu) / sd, (upper - mu) / sd, size=size, loc=mu, scale=sd + ) + + pymc3_random( + pm.TruncatedNormal, + {"mu": R, "sd": Rplusbig, "lower": -Rplusbig, "upper": Rplusbig}, + ref_rand=ref_rand, + ) def test_skew_normal(self): def ref_rand(size, alpha, mu, sd): return st.skewnorm.rvs(size=size, a=alpha, loc=mu, scale=sd) - pymc3_random(pm.SkewNormal, {'mu': R, 'sd': Rplus, 'alpha': R}, ref_rand=ref_rand) + + pymc3_random( + pm.SkewNormal, {"mu": R, "sd": Rplus, "alpha": R}, ref_rand=ref_rand + ) def test_half_normal(self): def ref_rand(size, tau): return st.halfnorm.rvs(size=size, loc=0, scale=tau ** -0.5) - pymc3_random(pm.HalfNormal, {'tau': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.HalfNormal, {"tau": Rplus}, ref_rand=ref_rand) def test_wald(self): # Cannot do anything too exciting as scipy wald is a # location-scale model of the *standard* wald with mu=1 and lam=1 def ref_rand(size, mu, lam, alpha): return st.wald.rvs(size=size, loc=alpha) - pymc3_random(pm.Wald, - {'mu': Domain([1., 1., 1.]), 'lam': Domain( - [1., 1., 1.]), 'alpha': Rplus}, - ref_rand=ref_rand) + + pymc3_random( + pm.Wald, + { + "mu": Domain([1.0, 1.0, 1.0]), + "lam": Domain([1.0, 1.0, 1.0]), + "alpha": Rplus, + }, + ref_rand=ref_rand, + ) def test_beta(self): def ref_rand(size, alpha, beta): return st.beta.rvs(a=alpha, b=beta, size=size) - pymc3_random(pm.Beta, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.Beta, {"alpha": Rplus, "beta": Rplus}, ref_rand=ref_rand) def test_exponential(self): def ref_rand(size, lam): - return nr.exponential(scale=1. / lam, size=size) - pymc3_random(pm.Exponential, {'lam': Rplus}, ref_rand=ref_rand) + return nr.exponential(scale=1.0 / lam, size=size) + + pymc3_random(pm.Exponential, {"lam": Rplus}, ref_rand=ref_rand) def test_laplace(self): def ref_rand(size, mu, b): return st.laplace.rvs(mu, b, size=size) - pymc3_random(pm.Laplace, {'mu': R, 'b': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.Laplace, {"mu": R, "b": Rplus}, ref_rand=ref_rand) def test_lognormal(self): def ref_rand(size, mu, tau): - return np.exp(mu + (tau ** -0.5) * st.norm.rvs(loc=0., scale=1., size=size)) - pymc3_random(pm.Lognormal, {'mu': R, 'tau': Rplusbig}, ref_rand=ref_rand) + return np.exp( + mu + (tau ** -0.5) * st.norm.rvs(loc=0.0, scale=1.0, size=size) + ) + + pymc3_random(pm.Lognormal, {"mu": R, "tau": Rplusbig}, ref_rand=ref_rand) def test_student_t(self): def ref_rand(size, nu, mu, lam): - return st.t.rvs(nu, mu, lam**-.5, size=size) - pymc3_random(pm.StudentT, {'nu': Rplus, 'mu': R, 'lam': Rplus}, ref_rand=ref_rand) + return st.t.rvs(nu, mu, lam ** -0.5, size=size) + + pymc3_random( + pm.StudentT, {"nu": Rplus, "mu": R, "lam": Rplus}, ref_rand=ref_rand + ) def test_cauchy(self): def ref_rand(size, alpha, beta): return st.cauchy.rvs(alpha, beta, size=size) - pymc3_random(pm.Cauchy, {'alpha': R, 'beta': Rplusbig}, ref_rand=ref_rand) + + pymc3_random(pm.Cauchy, {"alpha": R, "beta": Rplusbig}, ref_rand=ref_rand) def test_half_cauchy(self): def ref_rand(size, beta): return st.halfcauchy.rvs(scale=beta, size=size) - pymc3_random(pm.HalfCauchy, {'beta': Rplusbig}, ref_rand=ref_rand) + + pymc3_random(pm.HalfCauchy, {"beta": Rplusbig}, ref_rand=ref_rand) def test_gamma_alpha_beta(self): def ref_rand(size, alpha, beta): - return st.gamma.rvs(alpha, scale=1. / beta, size=size) - pymc3_random(pm.Gamma, {'alpha': Rplusbig, 'beta': Rplusbig}, ref_rand=ref_rand) + return st.gamma.rvs(alpha, scale=1.0 / beta, size=size) + + pymc3_random(pm.Gamma, {"alpha": Rplusbig, "beta": Rplusbig}, ref_rand=ref_rand) def test_gamma_mu_sd(self): def ref_rand(size, mu, sd): - return st.gamma.rvs(mu**2 / sd**2, scale=sd ** 2 / mu, size=size) - pymc3_random(pm.Gamma, {'mu': Rplusbig, 'sd': Rplusbig}, ref_rand=ref_rand) + return st.gamma.rvs(mu ** 2 / sd ** 2, scale=sd ** 2 / mu, size=size) + + pymc3_random(pm.Gamma, {"mu": Rplusbig, "sd": Rplusbig}, ref_rand=ref_rand) def test_inverse_gamma(self): def ref_rand(size, alpha, beta): return st.invgamma.rvs(a=alpha, scale=beta, size=size) - pymc3_random(pm.InverseGamma, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand) + + pymc3_random( + pm.InverseGamma, {"alpha": Rplus, "beta": Rplus}, ref_rand=ref_rand + ) def test_pareto(self): def ref_rand(size, alpha, m): return st.pareto.rvs(alpha, scale=m, size=size) - pymc3_random(pm.Pareto, {'alpha': Rplusbig, 'm': Rplusbig}, ref_rand=ref_rand) + + pymc3_random(pm.Pareto, {"alpha": Rplusbig, "m": Rplusbig}, ref_rand=ref_rand) def test_ex_gaussian(self): def ref_rand(size, mu, sigma, nu): return nr.normal(mu, sigma, size=size) + nr.exponential(scale=nu, size=size) - pymc3_random(pm.ExGaussian, {'mu': R, 'sigma': Rplus, 'nu': Rplus}, ref_rand=ref_rand) + + pymc3_random( + pm.ExGaussian, {"mu": R, "sigma": Rplus, "nu": Rplus}, ref_rand=ref_rand + ) def test_vonmises(self): def ref_rand(size, mu, kappa): return st.vonmises.rvs(size=size, loc=mu, kappa=kappa) - pymc3_random(pm.VonMises, {'mu': R, 'kappa': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.VonMises, {"mu": R, "kappa": Rplus}, ref_rand=ref_rand) def test_flat(self): with pm.Model(): - f = pm.Flat('f') + f = pm.Flat("f") with pytest.raises(ValueError): f.random(1) def test_half_flat(self): with pm.Model(): - f = pm.HalfFlat('f') + f = pm.HalfFlat("f") with pytest.raises(ValueError): f.random(1) def test_binomial(self): - pymc3_random_discrete(pm.Binomial, {'n': Nat, 'p': Unit}, ref_rand=st.binom.rvs) + pymc3_random_discrete(pm.Binomial, {"n": Nat, "p": Unit}, ref_rand=st.binom.rvs) def test_beta_binomial(self): - pymc3_random_discrete(pm.BetaBinomial, {'n': Nat, 'alpha': Rplus, 'beta': Rplus}, - ref_rand=self._beta_bin) + pymc3_random_discrete( + pm.BetaBinomial, + {"n": Nat, "alpha": Rplus, "beta": Rplus}, + ref_rand=self._beta_bin, + ) def _beta_bin(self, n, alpha, beta, size=None): return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size)) def test_bernoulli(self): - pymc3_random_discrete(pm.Bernoulli, {'p': Unit}, - ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size)) + pymc3_random_discrete( + pm.Bernoulli, + {"p": Unit}, + ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size), + ) def test_poisson(self): - pymc3_random_discrete(pm.Poisson, {'mu': Rplusbig}, size=500, ref_rand=st.poisson.rvs) + pymc3_random_discrete( + pm.Poisson, {"mu": Rplusbig}, size=500, ref_rand=st.poisson.rvs + ) def test_negative_binomial(self): def ref_rand(size, alpha, mu): return st.nbinom.rvs(alpha, alpha / (mu + alpha), size=size) - pymc3_random_discrete(pm.NegativeBinomial, {'mu': Rplusbig, 'alpha': Rplusbig}, - size=100, fails=50, ref_rand=ref_rand) + + pymc3_random_discrete( + pm.NegativeBinomial, + {"mu": Rplusbig, "alpha": Rplusbig}, + size=100, + fails=50, + ref_rand=ref_rand, + ) def test_geometric(self): - pymc3_random_discrete(pm.Geometric, {'p': Unit}, size=500, fails=50, ref_rand=nr.geometric) + pymc3_random_discrete( + pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric + ) def test_discrete_uniform(self): def ref_rand(size, lower, upper): return st.randint.rvs(lower, upper + 1, size=size) - pymc3_random_discrete(pm.DiscreteUniform, {'lower': -NatSmall, 'upper': NatSmall}, - ref_rand=ref_rand) + + pymc3_random_discrete( + pm.DiscreteUniform, + {"lower": -NatSmall, "upper": NatSmall}, + ref_rand=ref_rand, + ) def test_discrete_weibull(self): def ref_rand(size, q, beta): u = np.random.uniform(size=size) - return np.ceil(np.power(np.log(1 - u) / np.log(q), 1. / beta)) - 1 + return np.ceil(np.power(np.log(1 - u) / np.log(q), 1.0 / beta)) - 1 - pymc3_random_discrete(pm.DiscreteWeibull, {'q': Unit, 'beta': Rplusdunif}, - ref_rand=ref_rand) + pymc3_random_discrete( + pm.DiscreteWeibull, {"q": Unit, "beta": Rplusdunif}, ref_rand=ref_rand + ) - @pytest.mark.parametrize('s', [2, 3, 4]) + @pytest.mark.parametrize("s", [2, 3, 4]) def test_categorical_random(self, s): def ref_rand(size, p): return nr.choice(np.arange(p.shape[0]), p=p, size=size) - pymc3_random_discrete(pm.Categorical, {'p': Simplex(s)}, ref_rand=ref_rand) + + pymc3_random_discrete(pm.Categorical, {"p": Simplex(s)}, ref_rand=ref_rand) def test_constant_dist(self): def ref_rand(size, c): return c * np.ones(size, dtype=int) - pymc3_random_discrete(pm.Constant, {'c': I}, ref_rand=ref_rand) + + pymc3_random_discrete(pm.Constant, {"c": I}, ref_rand=ref_rand) def test_mv_normal(self): def ref_rand(size, mu, cov): @@ -596,41 +701,82 @@ def ref_rand_uchol(size, mu, chol): return ref_rand(size, mu, np.dot(chol.T, chol)) for n in [2, 3]: - pymc3_random(pm.MvNormal, {'mu': Vector(R, n), 'cov': PdMatrix(n)}, - size=100, valuedomain=Vector(R, n), ref_rand=ref_rand) - pymc3_random(pm.MvNormal, {'mu': Vector(R, n), 'tau': PdMatrix(n)}, - size=100, valuedomain=Vector(R, n), ref_rand=ref_rand_tau) - pymc3_random(pm.MvNormal, {'mu': Vector(R, n), 'chol': PdMatrixChol(n)}, - size=100, valuedomain=Vector(R, n), ref_rand=ref_rand_chol) pymc3_random( pm.MvNormal, - {'mu': Vector(R, n), 'chol': PdMatrixCholUpper(n)}, - size=100, valuedomain=Vector(R, n), ref_rand=ref_rand_uchol, - extra_args={'lower': False} + {"mu": Vector(R, n), "cov": PdMatrix(n)}, + size=100, + valuedomain=Vector(R, n), + ref_rand=ref_rand, + ) + pymc3_random( + pm.MvNormal, + {"mu": Vector(R, n), "tau": PdMatrix(n)}, + size=100, + valuedomain=Vector(R, n), + ref_rand=ref_rand_tau, + ) + pymc3_random( + pm.MvNormal, + {"mu": Vector(R, n), "chol": PdMatrixChol(n)}, + size=100, + valuedomain=Vector(R, n), + ref_rand=ref_rand_chol, + ) + pymc3_random( + pm.MvNormal, + {"mu": Vector(R, n), "chol": PdMatrixCholUpper(n)}, + size=100, + valuedomain=Vector(R, n), + ref_rand=ref_rand_uchol, + extra_args={"lower": False}, ) def test_matrix_normal(self): def ref_rand(size, mu, rowcov, colcov): - return st.matrix_normal.rvs(mean=mu, rowcov=rowcov, colcov=colcov, size=size) + return st.matrix_normal.rvs( + mean=mu, rowcov=rowcov, colcov=colcov, size=size + ) # def ref_rand_tau(size, mu, tau): # return ref_rand(size, mu, linalg.inv(tau)) def ref_rand_chol(size, mu, rowchol, colchol): - return ref_rand(size, mu, rowcov=np.dot(rowchol, rowchol.T), - colcov=np.dot(colchol, colchol.T)) + return ref_rand( + size, + mu, + rowcov=np.dot(rowchol, rowchol.T), + colcov=np.dot(colchol, colchol.T), + ) def ref_rand_uchol(size, mu, rowchol, colchol): - return ref_rand(size, mu, rowcov=np.dot(rowchol.T, rowchol), - colcov=np.dot(colchol.T, colchol)) + return ref_rand( + size, + mu, + rowcov=np.dot(rowchol.T, rowchol), + colcov=np.dot(colchol.T, colchol), + ) for n in [2, 3]: - pymc3_random(pm.MatrixNormal, {'mu': RealMatrix(n, n), 'rowcov': PdMatrix(n), 'colcov': PdMatrix(n)}, - size=n, valuedomain=RealMatrix(n, n), ref_rand=ref_rand) + pymc3_random( + pm.MatrixNormal, + {"mu": RealMatrix(n, n), "rowcov": PdMatrix(n), "colcov": PdMatrix(n)}, + size=n, + valuedomain=RealMatrix(n, n), + ref_rand=ref_rand, + ) # pymc3_random(pm.MatrixNormal, {'mu': RealMatrix(n, n), 'tau': PdMatrix(n)}, # size=n, valuedomain=RealMatrix(n, n), ref_rand=ref_rand_tau) - pymc3_random(pm.MatrixNormal, {'mu': RealMatrix(n, n), 'rowchol': PdMatrixChol(n), 'colchol': PdMatrixChol(n)}, - size=n, valuedomain=RealMatrix(n, n), ref_rand=ref_rand_chol) + pymc3_random( + pm.MatrixNormal, + { + "mu": RealMatrix(n, n), + "rowchol": PdMatrixChol(n), + "colchol": PdMatrixChol(n), + }, + size=n, + valuedomain=RealMatrix(n, n), + ref_rand=ref_rand_chol, + ) # pymc3_random( # pm.MvNormal, # {'mu': RealMatrix(n, n), 'rowchol': PdMatrixCholUpper(n), 'colchol': PdMatrixCholUpper(n)}, @@ -641,7 +787,7 @@ def ref_rand_uchol(size, mu, rowchol, colchol): def test_kronecker_normal(self): def ref_rand(size, mu, covs, sigma): cov = pm.math.kronecker(covs[0], covs[1]).eval() - cov += sigma**2 * np.identity(cov.shape[0]) + cov += sigma ** 2 * np.identity(cov.shape[0]) return st.multivariate_normal.rvs(mean=mu, cov=cov, size=size) def ref_rand_chol(size, mu, chols, sigma): @@ -657,98 +803,135 @@ def ref_rand_evd(size, mu, evds, sigma): sizes = [2, 3] sigmas = [0, 1] for n, sigma in zip(sizes, sigmas): - N = n**2 + N = n ** 2 covs = [RandomPdMatrix(n), RandomPdMatrix(n)] chols = list(map(np.linalg.cholesky, covs)) evds = list(map(np.linalg.eigh, covs)) - dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) - mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) + dom = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N) + mu = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N) - std_args = {'mu': mu} - cov_args = {'covs': covs} - chol_args = {'chols': chols} - evd_args = {'evds': evds} + std_args = {"mu": mu} + cov_args = {"covs": covs} + chol_args = {"chols": chols} + evd_args = {"evds": evds} if sigma is not None and sigma != 0: - std_args['sigma'] = Domain([sigma], edges=(None, None)) + std_args["sigma"] = Domain([sigma], edges=(None, None)) else: for args in [cov_args, chol_args, evd_args]: - args['sigma'] = sigma + args["sigma"] = sigma pymc3_random( - pm.KroneckerNormal, std_args, valuedomain=dom, - ref_rand=ref_rand, extra_args=cov_args, model_args=cov_args) + pm.KroneckerNormal, + std_args, + valuedomain=dom, + ref_rand=ref_rand, + extra_args=cov_args, + model_args=cov_args, + ) pymc3_random( - pm.KroneckerNormal, std_args, valuedomain=dom, - ref_rand=ref_rand_chol, extra_args=chol_args, - model_args=chol_args) + pm.KroneckerNormal, + std_args, + valuedomain=dom, + ref_rand=ref_rand_chol, + extra_args=chol_args, + model_args=chol_args, + ) pymc3_random( - pm.KroneckerNormal, std_args, valuedomain=dom, - ref_rand=ref_rand_evd, extra_args=evd_args, - model_args=evd_args) + pm.KroneckerNormal, + std_args, + valuedomain=dom, + ref_rand=ref_rand_evd, + extra_args=evd_args, + model_args=evd_args, + ) def test_mv_t(self): def ref_rand(size, nu, Sigma, mu): normal = st.multivariate_normal.rvs(cov=Sigma, size=size).T chi2 = st.chi2.rvs(df=nu, size=size) return mu + np.sqrt(nu) * (normal / chi2).T + for n in [2, 3]: - pymc3_random(pm.MvStudentT, - {'nu': Domain([5, 10, 25, 50]), 'Sigma': PdMatrix( - n), 'mu': Vector(R, n)}, - size=100, valuedomain=Vector(R, n), ref_rand=ref_rand) + pymc3_random( + pm.MvStudentT, + { + "nu": Domain([5, 10, 25, 50]), + "Sigma": PdMatrix(n), + "mu": Vector(R, n), + }, + size=100, + valuedomain=Vector(R, n), + ref_rand=ref_rand, + ) def test_dirichlet(self): def ref_rand(size, a): return st.dirichlet.rvs(a, size=size) + for n in [2, 3]: - pymc3_random(pm.Dirichlet, {'a': Vector(Rplus, n)}, - valuedomain=Simplex(n), size=100, ref_rand=ref_rand) + pymc3_random( + pm.Dirichlet, + {"a": Vector(Rplus, n)}, + valuedomain=Simplex(n), + size=100, + ref_rand=ref_rand, + ) def test_multinomial(self): def ref_rand(size, p, n): return nr.multinomial(pvals=p, n=n, size=size) + for n in [2, 3]: - pymc3_random_discrete(pm.Multinomial, {'p': Simplex(n), 'n': Nat}, - valuedomain=Vector(Nat, n), size=100, ref_rand=ref_rand) + pymc3_random_discrete( + pm.Multinomial, + {"p": Simplex(n), "n": Nat}, + valuedomain=Vector(Nat, n), + size=100, + ref_rand=ref_rand, + ) def test_gumbel(self): def ref_rand(size, mu, beta): return st.gumbel_r.rvs(loc=mu, scale=beta, size=size) - pymc3_random(pm.Gumbel, {'mu': R, 'beta': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.Gumbel, {"mu": R, "beta": Rplus}, ref_rand=ref_rand) def test_logistic(self): def ref_rand(size, mu, s): return st.logistic.rvs(loc=mu, scale=s, size=size) - pymc3_random(pm.Logistic, {'mu': R, 's': Rplus}, ref_rand=ref_rand) + + pymc3_random(pm.Logistic, {"mu": R, "s": Rplus}, ref_rand=ref_rand) def test_logitnormal(self): def ref_rand(size, mu, sd): return expit(st.norm.rvs(loc=mu, scale=sd, size=size)) - pymc3_random(pm.LogitNormal, {'mu': R, 'sd': Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + pymc3_random(pm.LogitNormal, {"mu": R, "sd": Rplus}, ref_rand=ref_rand) + + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_interpolated(self): for mu in R.vals: for sd in Rplus.vals: - #pylint: disable=cell-var-from-loop + # pylint: disable=cell-var-from-loop def ref_rand(size): return st.norm.rvs(loc=mu, scale=sd, size=size) - class TestedInterpolated (pm.Interpolated): - + class TestedInterpolated(pm.Interpolated): def __init__(self, **kwargs): x_points = np.linspace(mu - 5 * sd, mu + 5 * sd, 100) pdf_points = st.norm.pdf(x_points, loc=mu, scale=sd) super(TestedInterpolated, self).__init__( - x_points=x_points, - pdf_points=pdf_points, - **kwargs + x_points=x_points, pdf_points=pdf_points, **kwargs ) pymc3_random(TestedInterpolated, {}, ref_rand=ref_rand) - @pytest.mark.skip('Wishart random sampling not implemented.\n' - 'See https://github.com/pymc-devs/pymc3/issues/538') + @pytest.mark.skip( + "Wishart random sampling not implemented.\n" + "See https://github.com/pymc-devs/pymc3/issues/538" + ) def test_wishart(self): # Wishart non current recommended for use: # https://github.com/pymc-devs/pymc3/issues/538 @@ -762,88 +945,77 @@ def test_wishart(self): def test_lkj(self): for n in [2, 10, 50]: - #pylint: disable=cell-var-from-loop - shape = n*(n-1)//2 + # pylint: disable=cell-var-from-loop + shape = n * (n - 1) // 2 def ref_rand(size, eta): - beta = eta - 1 + n/2 - return (st.beta.rvs(size=(size, shape), a=beta, b=beta)-.5)*2 - - class TestedLKJCorr (pm.LKJCorr): + beta = eta - 1 + n / 2 + return (st.beta.rvs(size=(size, shape), a=beta, b=beta) - 0.5) * 2 + class TestedLKJCorr(pm.LKJCorr): def __init__(self, **kwargs): - kwargs.pop('shape', None) - super(TestedLKJCorr, self).__init__( - n=n, - **kwargs - ) + kwargs.pop("shape", None) + super(TestedLKJCorr, self).__init__(n=n, **kwargs) - pymc3_random(TestedLKJCorr, - {'eta': Domain([1., 10., 100.])}, - size=10000//n, - ref_rand=ref_rand) + pymc3_random( + TestedLKJCorr, + {"eta": Domain([1.0, 10.0, 100.0])}, + size=10000 // n, + ref_rand=ref_rand, + ) def test_normalmixture(self): def ref_rand(size, w, mu, sd): component = np.random.choice(w.size, size=size, p=w) return np.random.normal(mu[component], sd[component], size=size) - pymc3_random(pm.NormalMixture, {'w': Simplex(2), - 'mu': Domain([[.05, 2.5], [-5., 1.]], edges=(None, None)), - 'sd': Domain([[1, 1], [1.5, 2.]], edges=(None, None))}, - extra_args={'comp_shape': 2}, - size=1000, - ref_rand=ref_rand) - pymc3_random(pm.NormalMixture, {'w': Simplex(3), - 'mu': Domain([[-5., 1., 2.5]], edges=(None, None)), - 'sd': Domain([[1.5, 2., 3.]], edges=(None, None))}, - extra_args={'comp_shape': 3}, - size=1000, - ref_rand=ref_rand) + pymc3_random( + pm.NormalMixture, + { + "w": Simplex(2), + "mu": Domain([[0.05, 2.5], [-5.0, 1.0]], edges=(None, None)), + "sd": Domain([[1, 1], [1.5, 2.0]], edges=(None, None)), + }, + extra_args={"comp_shape": 2}, + size=1000, + ref_rand=ref_rand, + ) + pymc3_random( + pm.NormalMixture, + { + "w": Simplex(3), + "mu": Domain([[-5.0, 1.0, 2.5]], edges=(None, None)), + "sd": Domain([[1.5, 2.0, 3.0]], edges=(None, None)), + }, + extra_args={"comp_shape": 3}, + size=1000, + ref_rand=ref_rand, + ) def test_mixture_random_shape(): # test the shape broadcasting in mixture random - y = np.concatenate([nr.poisson(5, size=10), - nr.poisson(9, size=10)]) + y = np.concatenate([nr.poisson(5, size=10), nr.poisson(9, size=10)]) with pm.Model() as m: comp0 = pm.Poisson.dist(mu=np.ones(2)) - w0 = pm.Dirichlet('w0', a=np.ones(2)) - like0 = pm.Mixture('like0', - w=w0, - comp_dists=comp0, - observed=y) - - comp1 = pm.Poisson.dist(mu=np.ones((20, 2)), - shape=(20, 2)) - w1 = pm.Dirichlet('w1', a=np.ones(2)) - like1 = pm.Mixture('like1', - w=w1, - comp_dists=comp1, - observed=y) + w0 = pm.Dirichlet("w0", a=np.ones(2)) + like0 = pm.Mixture("like0", w=w0, comp_dists=comp0, observed=y) + + comp1 = pm.Poisson.dist(mu=np.ones((20, 2)), shape=(20, 2)) + w1 = pm.Dirichlet("w1", a=np.ones(2)) + like1 = pm.Mixture("like1", w=w1, comp_dists=comp1, observed=y) comp2 = pm.Poisson.dist(mu=np.ones(2)) - w2 = pm.Dirichlet('w2', - a=np.ones(2), - shape=(20, 2)) - like2 = pm.Mixture('like2', - w=w2, - comp_dists=comp2, - observed=y) - - comp3 = pm.Poisson.dist(mu=np.ones(2), - shape=(20, 2)) - w3 = pm.Dirichlet('w3', - a=np.ones(2), - shape=(20, 2)) - like3 = pm.Mixture('like3', - w=w3, - comp_dists=comp3, - observed=y) - - rand0, rand1, rand2, rand3 = draw_values([like0, like1, like2, like3], - point=m.test_point, - size=100) + w2 = pm.Dirichlet("w2", a=np.ones(2), shape=(20, 2)) + like2 = pm.Mixture("like2", w=w2, comp_dists=comp2, observed=y) + + comp3 = pm.Poisson.dist(mu=np.ones(2), shape=(20, 2)) + w3 = pm.Dirichlet("w3", a=np.ones(2), shape=(20, 2)) + like3 = pm.Mixture("like3", w=w3, comp_dists=comp3, observed=y) + + rand0, rand1, rand2, rand3 = draw_values( + [like0, like1, like2, like3], point=m.test_point, size=100 + ) assert rand0.shape == (100, 20) assert rand1.shape == (100, 20) assert rand2.shape == (100, 20) @@ -851,29 +1023,34 @@ def test_mixture_random_shape(): with m: ppc = pm.sample_posterior_predictive([m.test_point], samples=200) - assert ppc['like0'].shape == (200, 20) - assert ppc['like1'].shape == (200, 20) - assert ppc['like2'].shape == (200, 20) - assert ppc['like3'].shape == (200, 20) + assert ppc["like0"].shape == (200, 20) + assert ppc["like1"].shape == (200, 20) + assert ppc["like2"].shape == (200, 20) + assert ppc["like3"].shape == (200, 20) def test_density_dist_with_random_sampleable(): with pm.Model() as model: - mu = pm.Normal('mu', 0, 1) + mu = pm.Normal("mu", 0, 1) normal_dist = pm.Normal.dist(mu, 1) - pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random) + pm.DensityDist( + "density_dist", + normal_dist.logp, + observed=np.random.randn(100), + random=normal_dist.random, + ) trace = pm.sample(100) samples = 500 ppc = pm.sample_posterior_predictive(trace, samples=samples, model=model, size=100) - assert len(ppc['density_dist']) == samples + assert len(ppc["density_dist"]) == samples def test_density_dist_without_random_not_sampleable(): with pm.Model() as model: - mu = pm.Normal('mu', 0, 1) + mu = pm.Normal("mu", 0, 1) normal_dist = pm.Normal.dist(mu, 1) - pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100)) + pm.DensityDist("density_dist", normal_dist.logp, observed=np.random.randn(100)) trace = pm.sample(100) samples = 500 diff --git a/pymc3/tests/test_distributions_timeseries.py b/pymc3/tests/test_distributions_timeseries.py index fe083ddff4..e35a6ccc53 100644 --- a/pymc3/tests/test_distributions_timeseries.py +++ b/pymc3/tests/test_distributions_timeseries.py @@ -8,40 +8,42 @@ import numpy as np + def test_AR(): # AR1 - data = np.array([0.3,1,2,3,4]) + data = np.array([0.3, 1, 2, 3, 4]) phi = np.array([0.99]) with Model() as t: - y = AR('y', phi, sd=1, shape=len(data)) - z = Normal('z', mu=phi*data[:-1], sd=1, shape=len(data)-1) - ar_like = t['y'].logp({'z':data[1:], 'y': data}) - reg_like = t['z'].logp({'z':data[1:], 'y': data}) + y = AR("y", phi, sd=1, shape=len(data)) + z = Normal("z", mu=phi * data[:-1], sd=1, shape=len(data) - 1) + ar_like = t["y"].logp({"z": data[1:], "y": data}) + reg_like = t["z"].logp({"z": data[1:], "y": data}) np.testing.assert_allclose(ar_like, reg_like) # AR1 and AR(1) with Model() as t: - rho = Normal('rho', 0., 1.) - y1 = AR1('y1', rho, 1., observed=data) - y2 = AR('y2', rho, 1., init=Normal.dist(0, 1), observed=data) - np.testing.assert_allclose(y1.logp(t.test_point), - y2.logp(t.test_point)) + rho = Normal("rho", 0.0, 1.0) + y1 = AR1("y1", rho, 1.0, observed=data) + y2 = AR("y2", rho, 1.0, init=Normal.dist(0, 1), observed=data) + np.testing.assert_allclose(y1.logp(t.test_point), y2.logp(t.test_point)) # AR1 + constant with Model() as t: - y = AR('y', [0.3, phi], sd=1, shape=len(data), constant=True) - z = Normal('z', mu=0.3 + phi*data[:-1], sd=1, shape=len(data)-1) - ar_like = t['y'].logp({'z':data[1:], 'y': data}) - reg_like = t['z'].logp({'z':data[1:], 'y': data}) + y = AR("y", [0.3, phi], sd=1, shape=len(data), constant=True) + z = Normal("z", mu=0.3 + phi * data[:-1], sd=1, shape=len(data) - 1) + ar_like = t["y"].logp({"z": data[1:], "y": data}) + reg_like = t["z"].logp({"z": data[1:], "y": data}) np.testing.assert_allclose(ar_like, reg_like) # AR2 phi = np.array([0.84, 0.10]) with Model() as t: - y = AR('y', phi, sd=1, shape=len(data)) - z = Normal('z', mu=phi[0]*data[1:-1]+phi[1]*data[:-2], sd=1, shape=len(data)-2) - ar_like = t['y'].logp({'z':data[2:], 'y': data}) - reg_like = t['z'].logp({'z':data[2:], 'y': data}) + y = AR("y", phi, sd=1, shape=len(data)) + z = Normal( + "z", mu=phi[0] * data[1:-1] + phi[1] * data[:-2], sd=1, shape=len(data) - 2 + ) + ar_like = t["y"].logp({"z": data[2:], "y": data}) + reg_like = t["z"].logp({"z": data[2:], "y": data}) np.testing.assert_allclose(ar_like, reg_like) @@ -51,57 +53,73 @@ def test_AR_nd(): beta_tp = np.random.randn(p, n) y_tp = np.random.randn(T, n) with Model() as t0: - beta = Normal('beta', 0., 1., - shape=(p, n), - testval=beta_tp) - AR('y', beta, sd=1.0, - shape=(T, n), testval=y_tp) + beta = Normal("beta", 0.0, 1.0, shape=(p, n), testval=beta_tp) + AR("y", beta, sd=1.0, shape=(T, n), testval=y_tp) with Model() as t1: - beta = Normal('beta', 0., 1., - shape=(p, n), - testval=beta_tp) + beta = Normal("beta", 0.0, 1.0, shape=(p, n), testval=beta_tp) for i in range(n): - AR('y_%d' % i, beta[:, i], sd=1.0, - shape=T, testval=y_tp[:, i]) + AR("y_%d" % i, beta[:, i], sd=1.0, shape=T, testval=y_tp[:, i]) - np.testing.assert_allclose(t0.logp(t0.test_point), - t1.logp(t1.test_point)) + np.testing.assert_allclose(t0.logp(t0.test_point), t1.logp(t1.test_point)) def test_GARCH11(): # test data ~ N(0, 1) - data = np.array([-1.35078362, -0.81254164, 0.28918551, -2.87043544, -0.94353337, - 0.83660719, -0.23336562, -0.58586298, -1.36856736, -1.60832975, - -1.31403141, 0.05446936, -0.97213128, -0.18928725, 1.62011258, - -0.95978616, -2.06536047, 0.6556103 , -0.27816645, -1.26413397]) + data = np.array( + [ + -1.35078362, + -0.81254164, + 0.28918551, + -2.87043544, + -0.94353337, + 0.83660719, + -0.23336562, + -0.58586298, + -1.36856736, + -1.60832975, + -1.31403141, + 0.05446936, + -0.97213128, + -0.18928725, + 1.62011258, + -0.95978616, + -2.06536047, + 0.6556103, + -0.27816645, + -1.26413397, + ] + ) omega = 0.6 alpha_1 = 0.4 beta_1 = 0.5 initial_vol = np.float64(0.9) vol = np.empty_like(data) vol[0] = initial_vol - for i in range(len(data)-1): - vol[i+1] = np.sqrt(omega + beta_1*vol[i]**2 + alpha_1*data[i]**2) + for i in range(len(data) - 1): + vol[i + 1] = np.sqrt(omega + beta_1 * vol[i] ** 2 + alpha_1 * data[i] ** 2) with Model() as t: - y = GARCH11('y', omega=omega, alpha_1=alpha_1, beta_1=beta_1, - initial_vol=initial_vol, shape=data.shape) - z = Normal('z', mu=0, sd=vol, shape=data.shape) - garch_like = t['y'].logp({'z':data, 'y': data}) - reg_like = t['z'].logp({'z':data, 'y': data}) + y = GARCH11( + "y", + omega=omega, + alpha_1=alpha_1, + beta_1=beta_1, + initial_vol=initial_vol, + shape=data.shape, + ) + z = Normal("z", mu=0, sd=vol, shape=data.shape) + garch_like = t["y"].logp({"z": data, "y": data}) + reg_like = t["z"].logp({"z": data, "y": data}) np.testing.assert_allclose(garch_like, reg_like) - def _gen_sde_path(sde, pars, dt, n, x0): xs = [x0] wt = np.random.normal(size=(n,) if isinstance(x0, float) else (n, x0.size)) for i in range(n): f, g = sde(xs[-1], *pars) - xs.append( - xs[-1] + f * dt + np.sqrt(dt) * g * wt[i] - ) + xs.append(xs[-1] + f * dt + np.sqrt(dt) * g * wt[i]) return np.array(xs) @@ -115,17 +133,17 @@ def test_linear(): z = x + np.random.randn(x.size) * sig2 # build model with Model() as model: - lamh = Flat('lamh') - xh = EulerMaruyama('xh', dt, sde, (lamh,), shape=N + 1, testval=x) - Normal('zh', mu=xh, sd=sig2, observed=z) + lamh = Flat("lamh") + xh = EulerMaruyama("xh", dt, sde, (lamh,), shape=N + 1, testval=x) + Normal("zh", mu=xh, sd=sig2, observed=z) # invert with model: - trace = sample(init='advi+adapt_diag', chains=1) + trace = sample(init="advi+adapt_diag", chains=1) ppc = sample_posterior_predictive(trace, model=model) # test p95 = [2.5, 97.5] lo, hi = np.percentile(trace[lamh], p95, axis=0) assert (lo < lam) and (lam < hi) - lo, hi = np.percentile(ppc['zh'], p95, axis=0) + lo, hi = np.percentile(ppc["zh"], p95, axis=0) assert ((lo < z) * (z < hi)).mean() > 0.95 diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index 4a0f0d3b91..9d0c757186 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -9,43 +9,46 @@ from .helpers import SeededTest -matplotlib.use('Agg', warn=False) +matplotlib.use("Agg", warn=False) def get_city_data(): """Helper to get city data""" - data = pd.read_csv(pm.get_data('srrs2.dat')) - cty_data = pd.read_csv(pm.get_data('cty.dat')) + data = pd.read_csv(pm.get_data("srrs2.dat")) + cty_data = pd.read_csv(pm.get_data("cty.dat")) - data = data[data.state == 'MN'] + data = data[data.state == "MN"] - data['fips'] = data.stfips * 1000 + data.cntyfips - cty_data['fips'] = cty_data.stfips * 1000 + cty_data.ctfips - data['lradon'] = np.log(np.where(data.activity == 0, .1, data.activity)) - data = data.merge(cty_data, 'inner', on='fips') + data["fips"] = data.stfips * 1000 + data.cntyfips + cty_data["fips"] = cty_data.stfips * 1000 + cty_data.ctfips + data["lradon"] = np.log(np.where(data.activity == 0, 0.1, data.activity)) + data = data.merge(cty_data, "inner", on="fips") - unique = data[['fips']].drop_duplicates() - unique['group'] = np.arange(len(unique)) - unique.set_index('fips') - return data.merge(unique, 'inner', on='fips') + unique = data[["fips"]].drop_duplicates() + unique["group"] = np.arange(len(unique)) + unique.set_index("fips") + return data.merge(unique, "inner", on="fips") class TestARM5_4(SeededTest): def build_model(self): - data = pd.read_csv(pm.get_data('wells.dat'), - delimiter=u' ', index_col=u'id', - dtype={u'switch': np.int8}) + data = pd.read_csv( + pm.get_data("wells.dat"), + delimiter=u" ", + index_col=u"id", + dtype={u"switch": np.int8}, + ) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() - P['1'] = 1 + P["1"] = 1 with pm.Model() as model: - effects = pm.Normal('effects', mu=0, sd=100, shape=len(P.columns)) + effects = pm.Normal("effects", mu=0, sd=100, shape=len(P.columns)) logit_p = tt.dot(floatX(np.array(P)), effects) - pm.Bernoulli('s', logit_p=logit_p, observed=floatX(data.switch.values)) + pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values)) return model def test_run(self): @@ -58,32 +61,37 @@ class TestARM12_6(SeededTest): def build_model(self): data = get_city_data() - self.obs_means = data.groupby('fips').lradon.mean().as_matrix() + self.obs_means = data.groupby("fips").lradon.mean().as_matrix() lradon = data.lradon.as_matrix() floor = data.floor.as_matrix() group = data.group.as_matrix() with pm.Model() as model: - groupmean = pm.Normal('groupmean', 0, 10. ** -2.) - groupsd = pm.Uniform('groupsd', 0, 10.) - sd = pm.Uniform('sd', 0, 10.) - floor_m = pm.Normal('floor_m', 0, 5. ** -2.) - means = pm.Normal('means', groupmean, groupsd ** -2., shape=len(self.obs_means)) - pm.Normal('lr', floor * floor_m + means[group], sd ** -2., observed=lradon) + groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0) + groupsd = pm.Uniform("groupsd", 0, 10.0) + sd = pm.Uniform("sd", 0, 10.0) + floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0) + means = pm.Normal( + "means", groupmean, groupsd ** -2.0, shape=len(self.obs_means) + ) + pm.Normal("lr", floor * floor_m + means[group], sd ** -2.0, observed=lradon) return model def too_slow(self): model = self.build_model() - start = {'groupmean': self.obs_means.mean(), - 'groupsd_interval__': 0, - 'sd_interval__': 0, - 'means': self.obs_means, - 'floor_m': 0., - } + start = { + "groupmean": self.obs_means.mean(), + "groupsd_interval__": 0, + "sd_interval__": 0, + "means": self.obs_means, + "floor_m": 0.0, + } with model: - start = pm.find_MAP(start=start, - vars=[model['groupmean'], model['sd_interval__'], model['floor_m']]) + start = pm.find_MAP( + start=start, + vars=[model["groupmean"], model["sd_interval__"], model["floor_m"]], + ) step = pm.NUTS(model.vars, scaling=start) pm.sample(50, step=step, start=start) @@ -91,7 +99,7 @@ def too_slow(self): class TestARM12_6Uranium(SeededTest): def build_model(self): data = get_city_data() - self.obs_means = data.groupby('fips').lradon.mean() + self.obs_means = data.groupby("fips").lradon.mean() lradon = data.lradon.as_matrix() floor = data.floor.as_matrix() @@ -99,27 +107,35 @@ def build_model(self): ufull = data.Uppm.as_matrix() with pm.Model() as model: - groupmean = pm.Normal('groupmean', 0, 10. ** -2.) - groupsd = pm.Uniform('groupsd', 0, 10.) - sd = pm.Uniform('sd', 0, 10.) - floor_m = pm.Normal('floor_m', 0, 5. ** -2.) - u_m = pm.Normal('u_m', 0, 5. ** -2) - means = pm.Normal('means', groupmean, groupsd ** -2., shape=len(self.obs_means)) - pm.Normal('lr', floor * floor_m + means[group] + ufull * u_m, sd ** - 2., - observed=lradon) + groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0) + groupsd = pm.Uniform("groupsd", 0, 10.0) + sd = pm.Uniform("sd", 0, 10.0) + floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0) + u_m = pm.Normal("u_m", 0, 5.0 ** -2) + means = pm.Normal( + "means", groupmean, groupsd ** -2.0, shape=len(self.obs_means) + ) + pm.Normal( + "lr", + floor * floor_m + means[group] + ufull * u_m, + sd ** -2.0, + observed=lradon, + ) return model def too_slow(self): model = self.build_model() with model: - start = pm.Point({ - 'groupmean': self.obs_means.mean(), - 'groupsd_interval__': 0, - 'sd_interval__': 0, - 'means': np.array(self.obs_means), - 'u_m': np.array([.72]), - 'floor_m': 0., - }) + start = pm.Point( + { + "groupmean": self.obs_means.mean(), + "groupsd_interval__": 0, + "sd_interval__": 0, + "means": np.array(self.obs_means), + "u_m": np.array([0.72]), + "floor_m": 0.0, + } + ) start = pm.find_MAP(start, model.vars[:-1]) H = model.fastd2logp() @@ -130,13 +146,121 @@ def too_slow(self): def build_disaster_model(masked=False): - disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, - 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, - 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, - 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, - 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, - 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) + disasters_data = np.array( + [ + 4, + 5, + 4, + 0, + 1, + 4, + 3, + 4, + 0, + 6, + 3, + 3, + 4, + 0, + 2, + 6, + 3, + 3, + 5, + 4, + 5, + 3, + 1, + 4, + 4, + 1, + 5, + 5, + 3, + 4, + 2, + 5, + 2, + 2, + 3, + 4, + 2, + 1, + 3, + 2, + 2, + 1, + 1, + 1, + 1, + 3, + 0, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 3, + 1, + 0, + 3, + 2, + 2, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 2, + 3, + 3, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 2, + 4, + 2, + 0, + 0, + 1, + 4, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + ] + ) if masked: disasters_data[[23, 68]] = -1 disasters_data = np.ma.masked_values(disasters_data, value=-1) @@ -144,27 +268,29 @@ def build_disaster_model(masked=False): with pm.Model() as model: # Prior for distribution of switchpoint location - switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years) + switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters - early_mean = pm.Exponential('early_mean', lam=1.) - late_mean = pm.Exponential('late_mean', lam=1.) + early_mean = pm.Exponential("early_mean", lam=1.0) + late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = np.arange(years) rate = tt.switch(switchpoint >= idx, early_mean, late_mean) # Data likelihood - pm.Poisson('disasters', rate, observed=disasters_data) + pm.Poisson("disasters", rate, observed=disasters_data) return model -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestDisasterModel(SeededTest): # Time series of recorded coal mining disasters in the UK from 1851 to 1962 def test_disaster_model(self): model = build_disaster_model(masked=False) with model: # Initial values for stochastic nodes - start = {'early_mean': 2., 'late_mean': 3.} + start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other varibles auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) @@ -174,7 +300,7 @@ def test_disaster_model_missing(self): model = build_disaster_model(masked=True) with model: # Initial values for stochastic nodes - start = {'early_mean': 2., 'late_mean': 3.} + start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other varibles auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) @@ -187,10 +313,14 @@ def build_model(self): true_intercept = 1 true_slope = 2 self.x = np.linspace(0, 1, size) - self.y = true_intercept + self.x * true_slope + np.random.normal(scale=.5, size=size) + self.y = ( + true_intercept + + self.x * true_slope + + np.random.normal(scale=0.5, size=size) + ) data = dict(x=self.x, y=self.y) with pm.Model() as model: - pm.GLM.from_formula('y ~ x', data) + pm.GLM.from_formula("y ~ x", data) return model def test_run(self): @@ -231,6 +361,7 @@ class TestLatentOccupancy(SeededTest): Created by Chris Fonnesbeck on 2008-07-28. Copyright (c) 2008 University of Otago. All rights reserved. """ + def setup_method(self): super(TestLatentOccupancy, self).setup_method() # Sample size @@ -240,36 +371,41 @@ def setup_method(self): # True occupancy pi = 0.4 # Simulate some data data - self.y = ((np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)).astype('int16') + self.y = ( + (np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n) + ).astype("int16") def build_model(self): with pm.Model() as model: # Estimated occupancy - psi = pm.Beta('psi', 1, 1) + psi = pm.Beta("psi", 1, 1) # Latent variable for occupancy - pm.Bernoulli('z', psi, shape=self.y.shape) + pm.Bernoulli("z", psi, shape=self.y.shape) # Estimated mean count - theta = pm.Uniform('theta', 0, 100) + theta = pm.Uniform("theta", 0, 100) # Poisson likelihood - pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y) + pm.ZeroInflatedPoisson("y", theta, psi, observed=self.y) return model def test_run(self): model = self.build_model() with model: start = { - 'psi': np.array(0.5, dtype='f'), - 'z': (self.y > 0).astype('int16'), - 'theta': np.array(5, dtype='f'), + "psi": np.array(0.5, dtype="f"), + "z": (self.y > 0).astype("int16"), + "theta": np.array(5, dtype="f"), } step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__]) step_two = pm.BinaryMetropolis([model.z]) pm.sample(50, step=[step_one, step_two], start=start, chains=1) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to starting inf at starting logP") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to starting inf at starting logP", +) class TestRSV(SeededTest): - ''' + """ This model estimates the population prevalence of respiratory syncytial virus (RSV) among children in Amman, Jordan, based on 3 years of admissions diagnosed with RSV to Al Bashir hospital. @@ -280,7 +416,8 @@ class TestRSV(SeededTest): 1-year-olds) for the proportion of the population in the city, as well as for the market share of the hospital. The latter is based on expert esimate, and hence encoded as a prior. - ''' + """ + def build_model(self): # 1-year-old children in Jordan kids = np.array([180489, 191817, 190830]) @@ -290,15 +427,15 @@ def build_model(self): rsv_cases = np.array([40, 59, 65]) with pm.Model() as model: # Al Bashir hospital market share - market_share = pm.Uniform('market_share', 0.5, 0.6) + market_share = pm.Uniform("market_share", 0.5, 0.6) # Number of 1 y.o. in Amman - n_amman = pm.Binomial('n_amman', kids, amman_prop, shape=3) + n_amman = pm.Binomial("n_amman", kids, amman_prop, shape=3) # Prior probability - prev_rsv = pm.Beta('prev_rsv', 1, 5, shape=3) + prev_rsv = pm.Beta("prev_rsv", 1, 5, shape=3) # RSV in Amman - y_amman = pm.Binomial('y_amman', n_amman, prev_rsv, shape=3, testval=100) + y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3, testval=100) # Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1) - pm.Binomial('y_hosp', y_amman, market_share, observed=rsv_cases) + pm.Binomial("y_hosp", y_amman, market_share, observed=rsv_cases) return model def test_run(self): diff --git a/pymc3/tests/test_glm.py b/pymc3/tests/test_glm.py index aa8844141f..979b593a54 100644 --- a/pymc3/tests/test_glm.py +++ b/pymc3/tests/test_glm.py @@ -19,7 +19,7 @@ def setup_class(cls): super(TestGLM, cls).setup_class() cls.intercept = 1 cls.slope = 3 - cls.sd = .05 + cls.sd = 0.05 x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000) cls.y_linear += np.random.normal(size=1000, scale=cls.sd) cls.data_linear = pd.DataFrame(dict(x=x_linear, y=cls.y_linear)) @@ -35,85 +35,113 @@ def setup_class(cls): def test_linear_component(self): with Model() as model: - lm = LinearComponent.from_formula('y ~ x', self.data_linear) - sigma = Uniform('sigma', 0, 20) - Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear) + lm = LinearComponent.from_formula("y ~ x", self.data_linear) + sigma = Uniform("sigma", 0, 20) + Normal("y_obs", mu=lm.y_est, sd=sigma, observed=self.y_linear) start = find_MAP(vars=[sigma]) step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) + trace = sample( + 500, + tune=0, + step=step, + start=start, + progressbar=False, + random_seed=self.random_seed, + ) - assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0 def test_glm(self): with Model() as model: - GLM.from_formula('y ~ x', self.data_linear) + GLM.from_formula("y ~ x", self.data_linear) step = Slice(model.vars) - trace = sample(500, step=step, tune=0, progressbar=False, - random_seed=self.random_seed) + trace = sample( + 500, step=step, tune=0, progressbar=False, random_seed=self.random_seed + ) - assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['sd'])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["sd"]) - self.sd), 1) == 0 def test_glm_offset(self): - offset = 1. + offset = 1.0 with Model() as model: - GLM.from_formula('y ~ x', self.data_linear, offset=offset) + GLM.from_formula("y ~ x", self.data_linear, offset=offset) step = Slice(model.vars) - trace = sample(500, step=step, tune=0, progressbar=False, - random_seed=self.random_seed) + trace = sample( + 500, step=step, tune=0, progressbar=False, random_seed=self.random_seed + ) - assert round(abs(np.mean(trace['Intercept'])-self.intercept+offset), 1) == 0 + assert ( + round(abs(np.mean(trace["Intercept"]) - self.intercept + offset), 1) + == 0 + ) def test_glm_link_func(self): with Model() as model: - GLM.from_formula('y ~ x', self.data_logistic, - family=families.Binomial(link=families.logit)) + GLM.from_formula( + "y ~ x", + self.data_logistic, + family=families.Binomial(link=families.logit), + ) step = Slice(model.vars) - trace = sample(1000, step=step, tune=0, progressbar=False, - random_seed=self.random_seed) + trace = sample( + 1000, step=step, tune=0, progressbar=False, random_seed=self.random_seed + ) - assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 + assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0 def test_glm_link_func2(self): with Model() as model: - GLM.from_formula('y ~ x', self.data_logistic2, - family=families.Binomial(priors={'n': self.data_logistic2['n']})) - trace = sample(1000, progressbar=False, - random_seed=self.random_seed) + GLM.from_formula( + "y ~ x", + self.data_logistic2, + family=families.Binomial(priors={"n": self.data_logistic2["n"]}), + ) + trace = sample(1000, progressbar=False, random_seed=self.random_seed) - assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 + assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0 def test_more_than_one_glm_is_ok(self): with Model(): - GLM.from_formula('y ~ x', self.data_logistic, - family=families.Binomial(link=families.logit), - name='glm1') - GLM.from_formula('y ~ x', self.data_logistic, - family=families.Binomial(link=families.logit), - name='glm2') + GLM.from_formula( + "y ~ x", + self.data_logistic, + family=families.Binomial(link=families.logit), + name="glm1", + ) + GLM.from_formula( + "y ~ x", + self.data_logistic, + family=families.Binomial(link=families.logit), + name="glm2", + ) def test_from_xy(self): with Model(): - GLM(self.data_logistic['x'], - self.data_logistic['y'], + GLM( + self.data_logistic["x"], + self.data_logistic["y"], family=families.Binomial(link=families.logit), - name='glm1') + name="glm1", + ) def test_boolean_y(self): - model = GLM.from_formula('y ~ x', pd.DataFrame( - {'x': self.data_logistic['x'], - 'y': self.data_logistic['y']} - ) + model = GLM.from_formula( + "y ~ x", + pd.DataFrame({"x": self.data_logistic["x"], "y": self.data_logistic["y"]}), ) - model_bool = GLM.from_formula('y ~ x', pd.DataFrame( - {'x': self.data_logistic['x'], - 'y': [bool(i) for i in self.data_logistic['y']]} - ) + model_bool = GLM.from_formula( + "y ~ x", + pd.DataFrame( + { + "x": self.data_logistic["x"], + "y": [bool(i) for i in self.data_logistic["y"]], + } + ), ) assert_equal(model.y.observations, model_bool.y.observations) diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py index 9f52068576..6122d73adc 100644 --- a/pymc3/tests/test_gp.py +++ b/pymc3/tests/test_gp.py @@ -18,8 +18,8 @@ def test_value(self): with pm.Model() as model: zero_mean = pm.gp.mean.Zero() M = theano.function([], zero_mean(X))() - assert np.all(M==0) - assert M.shape == (10, ) + assert np.all(M == 0) + assert M.shape == (10,) class TestConstantMean(object): @@ -28,8 +28,8 @@ def test_value(self): with pm.Model() as model: const_mean = pm.gp.mean.Constant(6) M = theano.function([], const_mean(X))() - assert np.all(M==6) - assert M.shape == (10, ) + assert np.all(M == 6) + assert M.shape == (10,) class TestLinearMean(object): @@ -39,7 +39,7 @@ def test_value(self): linear_mean = pm.gp.mean.Linear(2, 0.5) M = theano.function([], linear_mean(X))() npt.assert_allclose(M[1], 0.7222, atol=1e-3) - assert M.shape == (10, ) + assert M.shape == (10,) class TestAddProdMean(object): @@ -212,8 +212,16 @@ def test_multiops(self): X = np.linspace(0, 1, 3)[:, None] M = np.array([[1, 2, 3], [2, 1, 2], [3, 2, 1]]) with pm.Model() as model: - cov1 = 3 + pm.gp.cov.ExpQuad(1, 0.1) + M * pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1) - cov2 = pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1) * M + pm.gp.cov.ExpQuad(1, 0.1) + 3 + cov1 = ( + 3 + + pm.gp.cov.ExpQuad(1, 0.1) + + M * pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1) + ) + cov2 = ( + pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1) * M + + pm.gp.cov.ExpQuad(1, 0.1) + + 3 + ) K1 = theano.function([], cov1(X))() K2 = theano.function([], cov2(X))() assert np.allclose(K1, K2) @@ -247,10 +255,16 @@ def test_multiops(self): X2 = cartesian(X21, X22) X = cartesian(X1, X21, X22) with pm.Model() as model: - cov1 = 3 + pm.gp.cov.ExpQuad(1, 0.1) + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1) + cov1 = ( + 3 + + pm.gp.cov.ExpQuad(1, 0.1) + + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1) + ) cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K_true = kronecker(theano.function([], cov1(X1))(), theano.function([], cov2(X2))()).eval() + K_true = kronecker( + theano.function([], cov1(X1))(), theano.function([], cov2(X2))() + ).eval() K = theano.function([], cov(X))() npt.assert_allclose(K_true, K) @@ -269,7 +283,7 @@ def test_slice1(self): def test_slice2(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: - cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1,2]) + cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2]) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal @@ -279,7 +293,7 @@ def test_slice2(self): def test_slice3(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: - cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1,2]) + cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2]) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal @@ -289,7 +303,9 @@ def test_slice3(self): def test_diffslice(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: - cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad(3, ls=[0.1, 0.2, 0.3]) + cov = pm.gp.cov.ExpQuad( + 3, ls=0.1, active_dims=[1, 0, 0] + ) + pm.gp.cov.ExpQuad(3, ls=[0.1, 0.2, 0.3]) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3) # check diagonal @@ -305,7 +321,7 @@ def test_raises(self): class TestStability(object): def test_stable(self): - X = np.random.uniform(low=320., high=400., size=[2000, 2]) + X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2]) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.1) dists = theano.function([], cov.square_dist(X, X))() @@ -365,7 +381,7 @@ def test_1d(self): cov = pm.gp.cov.WhiteNoise(sigma=0.5) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) - npt.assert_allclose(K[0, 0], 0.5**2, atol=1e-3) + npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3) # check diagonal Kd = theano.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -507,11 +523,15 @@ def test_1d(self): class TestWarpedInput(object): def test_1d(self): X = np.linspace(0, 1, 10)[:, None] + def warp_func(x, a, b, c): return x + (a * tt.tanh(b * (x - c))) + with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) - cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52) + cov = pm.gp.cov.WarpedInput( + 1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52 + ) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) K = theano.function([], cov(X, X))() @@ -531,8 +551,10 @@ def test_raises(self): class TestGibbs(object): def test_1d(self): X = np.linspace(0, 2, 10)[:, None] + def tanh_func(x, x1, x2, w, x0): return (x1 + x2) / 2.0 - (x1 - x2) / 2.0 * tt.tanh((x - x0) / w) + with pm.Model() as model: cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0)) K = theano.function([], cov(X))() @@ -549,17 +571,21 @@ def test_raises(self): with pytest.raises(NotImplementedError): pm.gp.cov.Gibbs(2, lambda x: x) with pytest.raises(NotImplementedError): - pm.gp.cov.Gibbs(3, lambda x: x, active_dims=[0,1]) + pm.gp.cov.Gibbs(3, lambda x: x, active_dims=[0, 1]) class TestScaledCov(object): def test_1d(self): X = np.linspace(0, 1, 10)[:, None] + def scaling_func(x, a, b): - return a + b*x + return a + b * x + with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) - cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52) + cov = pm.gp.cov.ScaledCov( + 1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52 + ) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) K = theano.function([], cov(X, X))() @@ -580,10 +606,13 @@ class TestHandleArgs(object): def test_handleargs(self): def func_noargs(x): return x + def func_onearg(x, a): return x + a + def func_twoarg(x, a, b): return x + a + b + x = 100 a = 2 b = 3 @@ -612,19 +641,15 @@ def test_full(self): with pm.Model() as model: B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0]) npt.assert_allclose( - B(np.array([[2, 1.5], [3, -42]])).eval(), - self.B[2:4, 2:4] - ) + B(np.array([[2, 1.5], [3, -42]])).eval(), self.B[2:4, 2:4] + ) npt.assert_allclose(B(self.X).eval(), B_mat) def test_fullB(self): B_mat = self.B[self.rand_rows, self.rand_rows.T] with pm.Model() as model: B = pm.gp.cov.Coregion(1, B=self.B) - npt.assert_allclose( - B(np.array([[2], [3]])).eval(), - self.B[2:4, 2:4] - ) + npt.assert_allclose(B(np.array([[2], [3]])).eval(), self.B[2:4, 2:4]) npt.assert_allclose(B(self.X).eval(), B_mat) def test_Xs(self): @@ -632,9 +657,8 @@ def test_Xs(self): with pm.Model() as model: B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0]) npt.assert_allclose( - B(np.array([[2, 1.5]]), np.array([[3, -42]])).eval(), - self.B[2, 3] - ) + B(np.array([[2, 1.5]]), np.array([[3, -42]])).eval(), self.B[2, 3] + ) npt.assert_allclose(B(self.X, self.Xs).eval(), B_mat) def test_diag(self): @@ -642,9 +666,8 @@ def test_diag(self): with pm.Model() as model: B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0]) npt.assert_allclose( - B(np.array([[2, 1.5]]), diag=True).eval(), - np.diag(self.B)[2] - ) + B(np.array([[2, 1.5]]), diag=True).eval(), np.diag(self.B)[2] + ) npt.assert_allclose(B(self.X, diag=True).eval(), B_diag) def test_raises(self): @@ -664,19 +687,22 @@ def test_raises3(self): class TestMarginalVsLatent(object): - R""" + r""" Compare the logp of models Marginal, noise=0 and Latent. """ + def setup_method(self): - X = np.random.randn(50,3) - y = np.random.randn(50)*0.01 + X = np.random.randn(50, 3) + y = np.random.randn(50) * 0.01 Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60)*0.01 + pnew = np.random.randn(60) * 0.01 with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) gp = pm.gp.Marginal(mean_func, cov_func) - f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y) + f = gp.marginal_likelihood( + "f", X, y, noise=0.0, is_observed=False, observed=y + ) p = gp.conditional("p", Xnew) self.logp = model.logp({"p": pnew}) self.X = X @@ -708,15 +734,16 @@ def testLatent2(self): class TestMarginalVsMarginalSparse(object): - R""" + r""" Compare logp of models Marginal and MarginalSparse. Should be nearly equal when inducing points are same as inputs. """ + def setup_method(self): - X = np.random.randn(50,3) - y = np.random.randn(50)*0.01 + X = np.random.randn(50, 3) + y = np.random.randn(50) * 0.01 Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60)*0.01 + pnew = np.random.randn(60) * 0.01 with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) @@ -732,7 +759,7 @@ def setup_method(self): self.pnew = pnew self.gp = gp - @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC']) + @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"]) def testApproximations(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) @@ -743,7 +770,7 @@ def testApproximations(self, approx): approx_logp = model.logp({"f": self.y, "p": self.pnew}) npt.assert_allclose(approx_logp, self.logp, atol=0, rtol=1e-2) - @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC']) + @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"]) def testPredictVar(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) @@ -760,7 +787,9 @@ def testPredictCov(self): cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) gp = pm.gp.MarginalSparse(mean_func, cov_func, approx="DTC") - f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False) + f = gp.marginal_likelihood( + "f", self.X, self.X, self.y, self.sigma, is_observed=False + ) mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3) @@ -769,16 +798,20 @@ def testPredictCov(self): class TestGPAdditive(object): def setup_method(self): - self.X = np.random.randn(50,3) - self.y = np.random.randn(50)*0.01 + self.X = np.random.randn(50, 3) + self.y = np.random.randn(50) * 0.01 self.Xnew = np.random.randn(60, 3) self.noise = pm.gp.cov.WhiteNoise(0.1) - self.covs = (pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), - pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), - pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])) - self.means = (pm.gp.mean.Constant(0.5), - pm.gp.mean.Constant(0.5), - pm.gp.mean.Constant(0.5)) + self.covs = ( + pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), + pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), + pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), + ) + self.means = ( + pm.gp.mean.Constant(0.5), + pm.gp.mean.Constant(0.5), + pm.gp.mean.Constant(0.5), + ) def testAdditiveMarginal(self): with pm.Model() as model1: @@ -797,15 +830,20 @@ def testAdditiveMarginal(self): npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) with model1: - fp1 = gpsum.conditional("fp1", self.Xnew, given={"X": self.X, "y": self.y, - "noise": self.noise, "gp": gpsum}) + fp1 = gpsum.conditional( + "fp1", + self.Xnew, + given={"X": self.X, "y": self.y, "noise": self.noise, "gp": gpsum}, + ) with model2: fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2) + npt.assert_allclose( + fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2 + ) - @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC']) + @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"]) def testAdditiveMarginalSparse(self, approx): Xu = np.random.randn(10, 3) sigma = 0.1 @@ -819,19 +857,26 @@ def testAdditiveMarginalSparse(self, approx): model1_logp = model1.logp({"fsum": self.y}) with pm.Model() as model2: - gptot = pm.gp.MarginalSparse(reduce(add, self.means), reduce(add, self.covs), approx=approx) + gptot = pm.gp.MarginalSparse( + reduce(add, self.means), reduce(add, self.covs), approx=approx + ) fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma) model2_logp = model2.logp({"fsum": self.y}) npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) with model1: - fp1 = gpsum.conditional("fp1", self.Xnew, given={"X": self.X, "Xu": Xu, "y": self.y, - "sigma": sigma, "gp": gpsum}) + fp1 = gpsum.conditional( + "fp1", + self.Xnew, + given={"X": self.X, "Xu": Xu, "y": self.y, "sigma": sigma, "gp": gpsum}, + ) with model2: fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2) + npt.assert_allclose( + fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2 + ) def testAdditiveLatent(self): with pm.Model() as model1: @@ -850,13 +895,19 @@ def testAdditiveLatent(self): npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) with model1: - fp1 = gpsum.conditional("fp1", self.Xnew, given={"X": self.X, "f": self.y, "gp": gpsum}) + fp1 = gpsum.conditional( + "fp1", self.Xnew, given={"X": self.X, "f": self.y, "gp": gpsum} + ) with model2: fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose(fp1.logp({"fsum": self.y, "fp1": fp}), - fp2.logp({"fsum": self.y, "fp2": fp}), atol=0, rtol=1e-2) + npt.assert_allclose( + fp1.logp({"fsum": self.y, "fp1": fp}), + fp2.logp({"fsum": self.y, "fp2": fp}), + atol=0, + rtol=1e-2, + ) def testAdditiveSparseRaises(self): # cant add different approximations @@ -885,14 +936,15 @@ def testAdditiveTypeRaises2(self): class TestTP(object): - R""" + r""" Compare TP with high degress of freedom to GP """ + def setup_method(self): - X = np.random.randn(20,3) - y = np.random.randn(20)*0.01 + X = np.random.randn(20, 3) + y = np.random.randn(20) * 0.01 Xnew = np.random.randn(50, 3) - pnew = np.random.randn(50)*0.01 + pnew = np.random.randn(50) * 0.01 with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) gp = pm.gp.Latent(cov_func=cov_func) @@ -923,7 +975,9 @@ def testTPvsLatentReparameterized(self): chol = np.linalg.cholesky(cov_func(self.X).eval()) y_rotated = np.linalg.solve(chol, self.y) # testing full model logp unreliable due to introduction of chi2__log__ - plogp = p.logp({"f_rotated_": y_rotated, "p": self.pnew, "chi2__log__": np.log(1e20)}) + plogp = p.logp( + {"f_rotated_": y_rotated, "p": self.pnew, "chi2__log__": np.log(1e20)} + ) npt.assert_allclose(self.plogp, plogp, atol=0, rtol=1e-2) def testAdditiveTPRaises(self): @@ -939,23 +993,30 @@ class TestLatentKron(object): """ Compare gp.LatentKron to gp.Latent, both with Gaussian noise. """ + def setup_method(self): - self.Xs = [np.linspace(0, 1, 7)[:, None], - np.linspace(0, 1, 5)[:, None], - np.linspace(0, 1, 6)[:, None]] + self.Xs = [ + np.linspace(0, 1, 7)[:, None], + np.linspace(0, 1, 5)[:, None], + np.linspace(0, 1, 6)[:, None], + ] self.X = cartesian(*self.Xs) self.N = np.prod([len(X) for X in self.Xs]) self.y = np.random.randn(self.N) * 0.1 - self.Xnews = (np.random.randn(5, 1), - np.random.randn(5, 1), - np.random.randn(5, 1)) + self.Xnews = ( + np.random.randn(5, 1), + np.random.randn(5, 1), + np.random.randn(5, 1), + ) self.Xnew = np.concatenate(self.Xnews, axis=1) - self.pnew = np.random.randn(len(self.Xnew))*0.01 + self.pnew = np.random.randn(len(self.Xnew)) * 0.01 ls = 0.2 with pm.Model() as latent_model: - self.cov_funcs = (pm.gp.cov.ExpQuad(1, ls), - pm.gp.cov.ExpQuad(1, ls), - pm.gp.cov.ExpQuad(1, ls)) + self.cov_funcs = ( + pm.gp.cov.ExpQuad(1, ls), + pm.gp.cov.ExpQuad(1, ls), + pm.gp.cov.ExpQuad(1, ls), + ) cov_func = pm.gp.cov.Kron(self.cov_funcs) self.mean = pm.gp.mean.Constant(0.5) gp = pm.gp.Latent(mean_func=self.mean, cov_func=cov_func) @@ -967,53 +1028,59 @@ def setup_method(self): def testLatentKronvsLatent(self): with pm.Model() as kron_model: - kron_gp = pm.gp.LatentKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) - f = kron_gp.prior('f', self.Xs) - p = kron_gp.conditional('p', self.Xnew) - kronlatent_logp = kron_model.logp({"f_rotated_": self.y_rotated, "p": self.pnew}) + kron_gp = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs) + f = kron_gp.prior("f", self.Xs) + p = kron_gp.conditional("p", self.Xnew) + kronlatent_logp = kron_model.logp( + {"f_rotated_": self.y_rotated, "p": self.pnew} + ) npt.assert_allclose(kronlatent_logp, self.logp, atol=0, rtol=1e-3) def testLatentKronRaisesAdditive(self): with pm.Model() as kron_model: - gp1 = pm.gp.LatentKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) - gp2 = pm.gp.LatentKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) + gp1 = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs) + gp2 = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs) with pytest.raises(TypeError): gp1 + gp2 def testLatentKronRaisesSizes(self): with pm.Model() as kron_model: - gp = pm.gp.LatentKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) + gp = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs) with pytest.raises(ValueError): - gp.prior("f", Xs=[np.linspace(0, 1, 7)[:, None], - np.linspace(0, 1, 5)[:, None]]) + gp.prior( + "f", Xs=[np.linspace(0, 1, 7)[:, None], np.linspace(0, 1, 5)[:, None]] + ) class TestMarginalKron(object): """ Compare gp.MarginalKron to gp.Marginal. """ + def setup_method(self): - self.Xs = [np.linspace(0, 1, 7)[:, None], - np.linspace(0, 1, 5)[:, None], - np.linspace(0, 1, 6)[:, None]] + self.Xs = [ + np.linspace(0, 1, 7)[:, None], + np.linspace(0, 1, 5)[:, None], + np.linspace(0, 1, 6)[:, None], + ] self.X = cartesian(*self.Xs) self.N = np.prod([len(X) for X in self.Xs]) self.y = np.random.randn(self.N) * 0.1 - self.Xnews = (np.random.randn(5, 1), - np.random.randn(5, 1), - np.random.randn(5, 1)) + self.Xnews = ( + np.random.randn(5, 1), + np.random.randn(5, 1), + np.random.randn(5, 1), + ) self.Xnew = np.concatenate(self.Xnews, axis=1) self.sigma = 0.2 - self.pnew = np.random.randn(len(self.Xnew))*0.01 + self.pnew = np.random.randn(len(self.Xnew)) * 0.01 ls = 0.2 with pm.Model() as model: - self.cov_funcs = [pm.gp.cov.ExpQuad(1, ls), - pm.gp.cov.ExpQuad(1, ls), - pm.gp.cov.ExpQuad(1, ls)] + self.cov_funcs = [ + pm.gp.cov.ExpQuad(1, ls), + pm.gp.cov.ExpQuad(1, ls), + pm.gp.cov.ExpQuad(1, ls), + ] cov_func = pm.gp.cov.Kron(self.cov_funcs) self.mean = pm.gp.mean.Constant(0.5) gp = pm.gp.Marginal(mean_func=self.mean, cov_func=cov_func) @@ -1024,30 +1091,28 @@ def setup_method(self): def testMarginalKronvsMarginalpredict(self): with pm.Model() as kron_model: - kron_gp = pm.gp.MarginalKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) - f = kron_gp.marginal_likelihood('f', self.Xs, self.y, - sigma=self.sigma, shape=self.N) - p = kron_gp.conditional('p', self.Xnew) + kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) + f = kron_gp.marginal_likelihood( + "f", self.Xs, self.y, sigma=self.sigma, shape=self.N + ) + p = kron_gp.conditional("p", self.Xnew) mu, cov = kron_gp.predict(self.Xnew) npt.assert_allclose(mu, self.mu, atol=0, rtol=1e-2) npt.assert_allclose(cov, self.cov, atol=0, rtol=1e-2) def testMarginalKronvsMarginal(self): with pm.Model() as kron_model: - kron_gp = pm.gp.MarginalKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) - f = kron_gp.marginal_likelihood('f', self.Xs, self.y, - sigma=self.sigma, shape=self.N) - p = kron_gp.conditional('p', self.Xnew) - kron_logp = kron_model.logp({'p': self.pnew}) + kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) + f = kron_gp.marginal_likelihood( + "f", self.Xs, self.y, sigma=self.sigma, shape=self.N + ) + p = kron_gp.conditional("p", self.Xnew) + kron_logp = kron_model.logp({"p": self.pnew}) npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2) def testMarginalKronRaises(self): with pm.Model() as kron_model: - gp1 = pm.gp.MarginalKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) - gp2 = pm.gp.MarginalKron(mean_func=self.mean, - cov_funcs=self.cov_funcs) + gp1 = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) + gp2 = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) with pytest.raises(TypeError): gp1 + gp2 diff --git a/pymc3/tests/test_hdf5_backend.py b/pymc3/tests/test_hdf5_backend.py index ddbf58b08a..1b782e84db 100644 --- a/pymc3/tests/test_hdf5_backend.py +++ b/pymc3/tests/test_hdf5_backend.py @@ -4,19 +4,12 @@ import os import tempfile -STATS1 = [{ - 'a': np.float64, - 'b': np.bool -}] - -STATS2 = [{ - 'a': np.float64 -}, { - 'a': np.float64, - 'b': np.int64, -}] - -DBNAME = os.path.join(tempfile.gettempdir(), 'test.h5') +STATS1 = [{"a": np.float64, "b": np.bool}] + +STATS2 = [{"a": np.float64}, {"a": np.float64, "b": np.int64}] + +DBNAME = os.path.join(tempfile.gettempdir(), "test.h5") + class TestHDF50dSampling(bf.SamplingTestCase): backend = hdf5.HDF5 diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py index ed50a0494e..5cfc8c8189 100644 --- a/pymc3/tests/test_hmc.py +++ b/pymc3/tests/test_hmc.py @@ -7,7 +7,9 @@ import pytest import logging from pymc3.theanof import floatX -logger = logging.getLogger('pymc3') + +logger = logging.getLogger("pymc3") + def test_leapfrog_reversible(): n = 3 @@ -20,7 +22,7 @@ def test_leapfrog_reversible(): p = floatX(step.potential.random()) q = floatX(np.random.randn(size)) start = step.integrator.compute_state(p, q) - for epsilon in [.01, .1]: + for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): @@ -39,14 +41,17 @@ def test_nuts_tuning(): trace = pymc3.sample(10, step=step, tune=5, progressbar=False, chains=1) assert not step.tune - assert np.all(trace['step_size'][5:] == trace['step_size'][5]) + assert np.all(trace["step_size"][5:] == trace["step_size"][5]) + def test_nuts_error_reporting(caplog): model = pymc3.Model() with caplog.at_level(logging.ERROR) and pytest.raises(ValueError): with model: - pymc3.HalfNormal('a', sd=1, transform=None, testval=-1) - pymc3.HalfNormal('b', sd=1, transform=None) - trace = pymc3.sample(init='adapt_diag', chains=1) - assert "Bad initial energy, check any log probabilities that are inf or -inf: a -inf\nb" in caplog.text - + pymc3.HalfNormal("a", sd=1, transform=None, testval=-1) + pymc3.HalfNormal("b", sd=1, transform=None) + trace = pymc3.sample(init="adapt_diag", chains=1) + assert ( + "Bad initial energy, check any log probabilities that are inf or -inf: a -inf\nb" + in caplog.text + ) diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py index b231b08127..a90b039ffe 100644 --- a/pymc3/tests/test_math.py +++ b/pymc3/tests/test_math.py @@ -4,8 +4,18 @@ import theano.tensor as tt from theano.tests import unittest_tools as utt from pymc3.math import ( - LogDet, logdet, probit, invprobit, expand_packed_triangular, - log1pexp, log1mexp, kronecker, cartesian, kron_dot, kron_solve_lower) + LogDet, + logdet, + probit, + invprobit, + expand_packed_triangular, + log1pexp, + log1mexp, + kronecker, + cartesian, + kron_dot, + kron_solve_lower, +) from .helpers import SeededTest import pytest from pymc3.theanof import floatX @@ -14,14 +24,13 @@ def test_kronecker(): np.random.seed(1) # Create random matrices - [a, b, c] = [np.random.rand(3, 3+i) for i in range(3)] + [a, b, c] = [np.random.rand(3, 3 + i) for i in range(3)] - custom = kronecker(a, b, c) # Custom version + custom = kronecker(a, b, c) # Custom version nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c)) np.testing.assert_array_almost_equal( - custom.eval(), - nested.eval() # Standard nested version - ) + custom.eval(), nested.eval() # Standard nested version + ) def test_cartesian(): @@ -30,20 +39,21 @@ def test_cartesian(): b = [0, 2] c = [5, 6] manual_cartesian = np.array( - [[1, 0, 5], - [1, 0, 6], - [1, 2, 5], - [1, 2, 6], - [2, 0, 5], - [2, 0, 6], - [2, 2, 5], - [2, 2, 6], - [3, 0, 5], - [3, 0, 6], - [3, 2, 5], - [3, 2, 6], - ] - ) + [ + [1, 0, 5], + [1, 0, 6], + [1, 2, 5], + [1, 2, 6], + [2, 0, 5], + [2, 0, 6], + [2, 2, 5], + [2, 2, 6], + [3, 0, 5], + [3, 0, 6], + [3, 2, 5], + [3, 2, 6], + ] + ) auto_cart = cartesian(a, b, c) np.testing.assert_array_almost_equal(manual_cartesian, auto_cart) @@ -88,16 +98,19 @@ def test_log1pexp(): # import mpmath # mpmath.mp.dps = 1000 # [float(mpmath.log(1 + mpmath.exp(x))) for x in vals] - expected = np.array([ - 0.0, - 3.720075976020836e-44, - 4.539889921686465e-05, - 0.6930971818099453, - 0.6931471805599453, - 0.6931971818099453, - 10.000045398899218, - 100.0, - 1e+20]) + expected = np.array( + [ + 0.0, + 3.720075976020836e-44, + 4.539889921686465e-05, + 0.6930971818099453, + 0.6931471805599453, + 0.6931971818099453, + 10.000045398899218, + 100.0, + 1e20, + ] + ) actual = log1pexp(vals).eval() npt.assert_allclose(actual, expected) @@ -107,14 +120,17 @@ def test_log1mexp(): # import mpmath # mpmath.mp.dps = 1000 # [float(mpmath.log(1 - mpmath.exp(-x))) for x in vals] - expected = np.array([ - np.nan, - -np.inf, - -46.051701859880914, - -9.210390371559516, - -4.540096037048921e-05, - -3.720075976020836e-44, - 0.0]) + expected = np.array( + [ + np.nan, + -np.inf, + -46.051701859880914, + -9.210390371559516, + -4.540096037048921e-05, + -3.720075976020836e-44, + 0.0, + ] + ) actual = log1mexp(vals).eval() npt.assert_allclose(actual, expected) @@ -140,8 +156,10 @@ def validate(self, input_mat): # Test gradient: utt.verify_grad(self.op, [input_mat]) - @pytest.mark.skipif(theano.config.device in ["cuda", "gpu"], - reason="No logDet implementation on GPU.") + @pytest.mark.skipif( + theano.config.device in ["cuda", "gpu"], + reason="No logDet implementation on GPU.", + ) def test_basic(self): # Calls validate with different params test_case_1 = np.random.randn(3, 3) / np.sqrt(3) @@ -152,11 +170,11 @@ def test_basic(self): def test_expand_packed_triangular(): with pytest.raises(ValueError): - x = tt.matrix('x') - x.tag.test_value = np.array([[1.]]) + x = tt.matrix("x") + x.tag.test_value = np.array([[1.0]]) expand_packed_triangular(5, x) N = 5 - packed = tt.vector('packed') + packed = tt.vector("packed") packed.tag.test_value = floatX(np.zeros(N * (N + 1) // 2)) with pytest.raises(TypeError): expand_packed_triangular(packed.shape[0], packed) @@ -168,9 +186,17 @@ def test_expand_packed_triangular(): upper_packed = floatX(vals[upper != 0]) expand_lower = expand_packed_triangular(N, packed, lower=True) expand_upper = expand_packed_triangular(N, packed, lower=False) - expand_diag_lower = expand_packed_triangular(N, packed, lower=True, diagonal_only=True) - expand_diag_upper = expand_packed_triangular(N, packed, lower=False, diagonal_only=True) + expand_diag_lower = expand_packed_triangular( + N, packed, lower=True, diagonal_only=True + ) + expand_diag_upper = expand_packed_triangular( + N, packed, lower=False, diagonal_only=True + ) assert np.all(expand_lower.eval({packed: lower_packed}) == lower) assert np.all(expand_upper.eval({packed: upper_packed}) == upper) - assert np.all(expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals))) - assert np.all(expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals))) + assert np.all( + expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals)) + ) + assert np.all( + expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals)) + ) diff --git a/pymc3/tests/test_memo.py b/pymc3/tests/test_memo.py index 395595678b..51621f2845 100644 --- a/pymc3/tests/test_memo.py +++ b/pymc3/tests/test_memo.py @@ -3,14 +3,15 @@ def getmemo(): @memoize - def f(a, b=('a')): + def f(a, b=("a")): return str(a) + str(b) + return f def test_memo(): f = getmemo() - assert f('x', ['y', 'z']) == "x['y', 'z']" - assert f('x', ['a', 'z']) == "x['a', 'z']" - assert f('x', ['y', 'z']) == "x['y', 'z']" + assert f("x", ["y", "z"]) == "x['y', 'z']" + assert f("x", ["a", "z"]) == "x['a', 'z']" + assert f("x", ["y", "z"]) == "x['y', 'z']" diff --git a/pymc3/tests/test_minibatches.py b/pymc3/tests/test_minibatches.py index 01f4d13361..c93e2a134a 100644 --- a/pymc3/tests/test_minibatches.py +++ b/pymc3/tests/test_minibatches.py @@ -17,8 +17,9 @@ class _DataSampler(object): """ Not for users """ - def __init__(self, data, batchsize=50, random_seed=42, dtype='floatX'): - self.dtype = theano.config.floatX if dtype == 'floatX' else dtype + + def __init__(self, data, batchsize=50, random_seed=42, dtype="floatX"): + self.dtype = theano.config.floatX if dtype == "floatX" else dtype self.rng = np.random.RandomState(random_seed) self.data = data self.n = batchsize @@ -27,17 +28,15 @@ def __iter__(self): return self def __next__(self): - idx = (self.rng - .uniform(size=self.n, - low=0.0, - high=self.data.shape[0] - 1e-16) - .astype('int64')) + idx = self.rng.uniform( + size=self.n, low=0.0, high=self.data.shape[0] - 1e-16 + ).astype("int64") return np.asarray(self.data[idx], self.dtype) next = __next__ -@pytest.fixture('module') +@pytest.fixture("module") def datagen(): return _DataSampler(np.random.uniform(size=(1000, 10))) @@ -56,9 +55,8 @@ def integers_ndim(ndim): i += 1 -@pytest.mark.usefixtures('strict_float32') +@pytest.mark.usefixtures("strict_float32") class TestGenerator(object): - def test_basic(self): generator = GeneratorAdapter(integers()) gop = GeneratorOp(generator)() @@ -130,7 +128,7 @@ def test_gen_cloning_with_shape_change(self, datagen): res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50,) shared = theano.shared(datagen.data.astype(gen.dtype)) - res2 = theano.clone(res, {gen: shared**2}) + res2 = theano.clone(res, {gen: shared ** 2}) assert res2.eval().shape == (1000,) @@ -152,13 +150,14 @@ class TestScaling(object): """ Related to minibatch training """ + def test_density_scaling(self): with pm.Model() as model1: - Normal('n', observed=[[1]], total_size=1) + Normal("n", observed=[[1]], total_size=1) p1 = theano.function([], model1.logpt) with pm.Model() as model2: - Normal('n', observed=[[1]], total_size=2) + Normal("n", observed=[[1]], total_size=2) p2 = theano.function([], model2.logpt) assert p1() * 2 == p2() @@ -169,15 +168,16 @@ def true_dens(): g = gen1() for i, point in enumerate(g): yield stats.norm.logpdf(point).sum() * 10 + t = true_dens() # We have same size models with pm.Model() as model1: - Normal('n', observed=gen1(), total_size=100) + Normal("n", observed=gen1(), total_size=100) p1 = theano.function([], model1.logpt) with pm.Model() as model2: gen_var = generator(gen2()) - Normal('n', observed=gen_var, total_size=100) + Normal("n", observed=gen_var, total_size=100) p2 = theano.function([], model2.logpt) for i in range(10): @@ -190,13 +190,13 @@ def true_dens(): def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) - m = Normal('m') - Normal('n', observed=genvar, total_size=1000) + m = Normal("m") + Normal("n", observed=genvar, total_size=1000) grad1 = theano.function([m], tt.grad(model1.logpt, m)) with pm.Model() as model2: - m = Normal('m') + m = Normal("m") shavar = theano.shared(np.ones((1000, 100))) - Normal('n', observed=shavar) + Normal("n", observed=shavar) grad2 = theano.function([m], tt.grad(model2.logpt, m)) for i in range(10): @@ -207,78 +207,78 @@ def test_gradient_with_scaling(self): def test_multidim_scaling(self): with pm.Model() as model0: - Normal('n', observed=[[1, 1], - [1, 1]], total_size=[]) + Normal("n", observed=[[1, 1], [1, 1]], total_size=[]) p0 = theano.function([], model0.logpt) with pm.Model() as model1: - Normal('n', observed=[[1, 1], - [1, 1]], total_size=[2, 2]) + Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p1 = theano.function([], model1.logpt) with pm.Model() as model2: - Normal('n', observed=[[1], - [1]], total_size=[2, 2]) + Normal("n", observed=[[1], [1]], total_size=[2, 2]) p2 = theano.function([], model2.logpt) with pm.Model() as model3: - Normal('n', observed=[[1, 1]], total_size=[2, 2]) + Normal("n", observed=[[1, 1]], total_size=[2, 2]) p3 = theano.function([], model3.logpt) with pm.Model() as model4: - Normal('n', observed=[[1]], total_size=[2, 2]) + Normal("n", observed=[[1]], total_size=[2, 2]) p4 = theano.function([], model4.logpt) with pm.Model() as model5: - Normal('n', observed=[[1]], total_size=[2, Ellipsis, 2]) + Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2]) p5 = theano.function([], model5.logpt) assert p0() == p1() == p2() == p3() == p4() == p5() def test_common_errors(self): with pm.Model(): with pytest.raises(ValueError) as e: - Normal('n', observed=[[1]], total_size=[2, Ellipsis, 2, 2]) - assert 'Length of' in str(e.value) + Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2, 2]) + assert "Length of" in str(e.value) with pytest.raises(ValueError) as e: - Normal('n', observed=[[1]], total_size=[2, 2, 2]) - assert 'Length of' in str(e.value) + Normal("n", observed=[[1]], total_size=[2, 2, 2]) + assert "Length of" in str(e.value) with pytest.raises(TypeError) as e: - Normal('n', observed=[[1]], total_size='foo') - assert 'Unrecognized' in str(e.value) + Normal("n", observed=[[1]], total_size="foo") + assert "Unrecognized" in str(e.value) with pytest.raises(TypeError) as e: - Normal('n', observed=[[1]], total_size=['foo']) - assert 'Unrecognized' in str(e.value) + Normal("n", observed=[[1]], total_size=["foo"]) + assert "Unrecognized" in str(e.value) with pytest.raises(ValueError) as e: - Normal('n', observed=[[1]], total_size=[Ellipsis, Ellipsis]) - assert 'Double Ellipsis' in str(e.value) + Normal("n", observed=[[1]], total_size=[Ellipsis, Ellipsis]) + assert "Double Ellipsis" in str(e.value) def test_mixed1(self): with pm.Model(): data = np.random.rand(10, 20, 30, 40, 50) mb = pm.Minibatch(data, [2, None, 20, Ellipsis, 10]) - Normal('n', observed=mb, total_size=(10, None, 30, Ellipsis, 50)) + Normal("n", observed=mb, total_size=(10, None, 30, Ellipsis, 50)) def test_mixed2(self): with pm.Model(): data = np.random.rand(10, 20, 30, 40, 50) mb = pm.Minibatch(data, [2, None, 20]) - Normal('n', observed=mb, total_size=(10, None, 30)) + Normal("n", observed=mb, total_size=(10, None, 30)) def test_free_rv(self): with pm.Model() as model4: - Normal('n', observed=[[1, 1], - [1, 1]], total_size=[2, 2]) + Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p4 = theano.function([], model4.logpt) with pm.Model() as model5: - Normal('n', total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False)) + Normal( + "n", + total_size=[2, Ellipsis, 2], + shape=(1, 1), + broadcastable=(False, False), + ) p5 = theano.function([model5.n], model5.logpt) assert p4() == p5(pm.floatX([[1]])) - assert p4() == p5(pm.floatX([[1, 1], - [1, 1]])) + assert p4() == p5(pm.floatX([[1, 1], [1, 1]])) -@pytest.mark.usefixtures('strict_float32') +@pytest.mark.usefixtures("strict_float32") class TestMinibatch(object): data = np.random.rand(30, 10, 40, 10, 50) diff --git a/pymc3/tests/test_missing.py b/pymc3/tests/test_missing.py index 1258ed3ea2..0da46ef27d 100644 --- a/pymc3/tests/test_missing.py +++ b/pymc3/tests/test_missing.py @@ -7,8 +7,8 @@ def test_missing(): data = ma.masked_values([1, 2, -1, 4, -1], value=-1) with Model() as model: - x = Normal('x', 1, 1) - Normal('y', x, 1, observed=data) + x = Normal("x", 1, 1) + Normal("y", x, 1, observed=data) y_missing, = model.missing_values assert y_missing.tag.test_value.shape == (2,) @@ -19,8 +19,8 @@ def test_missing(): def test_missing_pandas(): data = pd.DataFrame([1, 2, numpy.nan, 4, numpy.nan]) with Model() as model: - x = Normal('x', 1, 1) - Normal('y', x, 1, observed=data) + x = Normal("x", 1, 1) + Normal("y", x, 1, observed=data) y_missing, = model.missing_values assert y_missing.tag.test_value.shape == (2,) diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py index 7ee53a8753..d4dea35970 100644 --- a/pymc3/tests/test_mixture.py +++ b/pymc3/tests/test_mixture.py @@ -2,8 +2,20 @@ from numpy.testing import assert_allclose from .helpers import SeededTest -from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \ - Mixture, NormalMixture, MvNormal, sample, Metropolis, Model +from pymc3 import ( + Dirichlet, + Gamma, + Normal, + Lognormal, + Poisson, + Exponential, + Mixture, + NormalMixture, + MvNormal, + sample, + Metropolis, + Model, +) import scipy.stats as st from scipy.special import logsumexp from pymc3.theanof import floatX @@ -30,188 +42,200 @@ def setup_class(cls): super(TestMixture, cls).setup_class() cls.norm_w = np.array([0.75, 0.25]) - cls.norm_mu = np.array([0., 5.]) + cls.norm_mu = np.array([0.0, 5.0]) cls.norm_sd = np.ones_like(cls.norm_mu) - cls.norm_x = generate_normal_mixture_data(cls.norm_w, cls.norm_mu, cls.norm_sd, size=1000) + cls.norm_x = generate_normal_mixture_data( + cls.norm_w, cls.norm_mu, cls.norm_sd, size=1000 + ) cls.pois_w = np.array([0.4, 0.6]) - cls.pois_mu = np.array([5., 20.]) + cls.pois_mu = np.array([5.0, 20.0]) cls.pois_x = generate_poisson_mixture_data(cls.pois_w, cls.pois_mu, size=1000) def test_mixture_list_of_normals(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.norm_w))) - mu = Normal('mu', 0., 10., shape=self.norm_w.size) - tau = Gamma('tau', 1., 1., shape=self.norm_w.size) - Mixture('x_obs', w, - [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])], - observed=self.norm_x) + w = Dirichlet("w", floatX(np.ones_like(self.norm_w))) + mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) + tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) + Mixture( + "x_obs", + w, + [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])], + observed=self.norm_x, + ) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample( + 5000, step, random_seed=self.random_seed, progressbar=False, chains=1 + ) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.norm_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.norm_mu), - rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1 + ) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1 + ) def test_normal_mixture(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.norm_w))) - mu = Normal('mu', 0., 10., shape=self.norm_w.size) - tau = Gamma('tau', 1., 1., shape=self.norm_w.size) - NormalMixture('x_obs', w, mu, tau=tau, observed=self.norm_x) + w = Dirichlet("w", floatX(np.ones_like(self.norm_w))) + mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) + tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) + NormalMixture("x_obs", w, mu, tau=tau, observed=self.norm_x) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample( + 5000, step, random_seed=self.random_seed, progressbar=False, chains=1 + ) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.norm_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.norm_mu), - rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1 + ) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1 + ) def test_normal_mixture_nd(self): nd, ncomp = 3, 5 with Model() as model0: - mus = Normal('mus', shape=(nd, ncomp)) - taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp)) - ws = Dirichlet('ws', np.ones(ncomp)) - mixture0 = NormalMixture('m', w=ws, mu=mus, tau=taus, shape=nd) + mus = Normal("mus", shape=(nd, ncomp)) + taus = Gamma("taus", alpha=1, beta=1, shape=(nd, ncomp)) + ws = Dirichlet("ws", np.ones(ncomp)) + mixture0 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) with Model() as model1: - mus = Normal('mus', shape=(nd, ncomp)) - taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp)) - ws = Dirichlet('ws', np.ones(ncomp)) - comp_dist = [Normal.dist(mu=mus[:, i], tau=taus[:, i]) - for i in range(ncomp)] - mixture1 = Mixture('m', w=ws, comp_dists=comp_dist, shape=nd) + mus = Normal("mus", shape=(nd, ncomp)) + taus = Gamma("taus", alpha=1, beta=1, shape=(nd, ncomp)) + ws = Dirichlet("ws", np.ones(ncomp)) + comp_dist = [ + Normal.dist(mu=mus[:, i], tau=taus[:, i]) for i in range(ncomp) + ] + mixture1 = Mixture("m", w=ws, comp_dists=comp_dist, shape=nd) testpoint = model0.test_point - testpoint['mus'] = np.random.randn(nd, ncomp) + testpoint["mus"] = np.random.randn(nd, ncomp) assert_allclose(model0.logp(testpoint), model1.logp(testpoint)) assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint)) def test_poisson_mixture(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.pois_w))) - mu = Gamma('mu', 1., 1., shape=self.pois_w.size) - Mixture('x_obs', w, Poisson.dist(mu), observed=self.pois_x) + w = Dirichlet("w", floatX(np.ones_like(self.pois_w))) + mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) + Mixture("x_obs", w, Poisson.dist(mu), observed=self.pois_x) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample( + 5000, step, random_seed=self.random_seed, progressbar=False, chains=1 + ) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.pois_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.pois_mu), - rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1 + ) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1 + ) def test_mixture_list_of_poissons(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.pois_w))) - mu = Gamma('mu', 1., 1., shape=self.pois_w.size) - Mixture('x_obs', w, - [Poisson.dist(mu[0]), Poisson.dist(mu[1])], - observed=self.pois_x) + w = Dirichlet("w", floatX(np.ones_like(self.pois_w))) + mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) + Mixture( + "x_obs", + w, + [Poisson.dist(mu[0]), Poisson.dist(mu[1])], + observed=self.pois_x, + ) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample( + 5000, step, random_seed=self.random_seed, progressbar=False, chains=1 + ) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.pois_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.pois_mu), - rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1 + ) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1 + ) def test_mixture_of_mvn(self): - mu1 = np.asarray([0., 1.]) + mu1 = np.asarray([0.0, 1.0]) cov1 = np.diag([1.5, 2.5]) - mu2 = np.asarray([1., 0.]) + mu2 = np.asarray([1.0, 0.0]) cov2 = np.diag([2.5, 3.5]) - obs = np.asarray([[.5, .5], mu1, mu2]) + obs = np.asarray([[0.5, 0.5], mu1, mu2]) with Model() as model: - w = Dirichlet('w', floatX(np.ones(2)), transform=None) + w = Dirichlet("w", floatX(np.ones(2)), transform=None) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) - y = Mixture('x_obs', w, [mvncomp1, mvncomp2], - observed=obs) + y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component - complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1), - st.multivariate_normal.logpdf(obs, mu2, cov2)) - ).T + complogp_st = np.vstack( + ( + st.multivariate_normal.logpdf(obs, mu1, cov1), + st.multivariate_normal.logpdf(obs, mu2, cov2), + ) + ).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point - mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st, - axis=-1, keepdims=True) - assert_allclose(y.logp_elemwise(testpoint), - mixlogp_st) + mixlogp_st = logsumexp( + np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=True + ) + assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model - priorlogp = st.dirichlet.logpdf(x=testpoint['w'], - alpha=np.ones(2), - ) - assert_allclose(model.logp(testpoint), - mixlogp_st.sum() + priorlogp) + priorlogp = st.dirichlet.logpdf(x=testpoint["w"], alpha=np.ones(2)) + assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp) def test_mixture_of_mixture(self): nbr = 4 with Model() as model: # mixtures components g_comp = Normal.dist( - mu=Exponential('mu_g', lam=1.0, shape=nbr, transform=None), + mu=Exponential("mu_g", lam=1.0, shape=nbr, transform=None), sd=1, - shape=nbr) + shape=nbr, + ) l_comp = Lognormal.dist( - mu=Exponential('mu_l', lam=1.0, shape=nbr, transform=None), + mu=Exponential("mu_l", lam=1.0, shape=nbr, transform=None), sd=1, - shape=nbr) + shape=nbr, + ) # weight vector for the mixtures - g_w = Dirichlet('g_w', a=floatX(np.ones(nbr)*0.0000001), transform=None) - l_w = Dirichlet('l_w', a=floatX(np.ones(nbr)*0.0000001), transform=None) + g_w = Dirichlet("g_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None) + l_w = Dirichlet("l_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None) # mixture components g_mix = Mixture.dist(w=g_w, comp_dists=g_comp) l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures - mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None) - mix = Mixture('mix', w=mix_w, - comp_dists=[g_mix, l_mix], - observed=np.exp(self.norm_x)) + mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None) + mix = Mixture( + "mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x) + ) test_point = model.test_point def mixmixlogp(value, point): - priorlogp = st.dirichlet.logpdf(x=point['g_w'], - alpha=np.ones(nbr)*0.0000001, - ) + \ - st.expon.logpdf(x=point['mu_g']).sum() + \ - st.dirichlet.logpdf(x=point['l_w'], - alpha=np.ones(nbr)*0.0000001, - ) + \ - st.expon.logpdf(x=point['mu_l']).sum() + \ - st.dirichlet.logpdf(x=point['mix_w'], - alpha=np.ones(2), - ) - complogp1 = st.norm.logpdf(x=value, - loc=point['mu_g']) - mixlogp1 = logsumexp(np.log(point['g_w']) + complogp1, - axis=-1, keepdims=True) - complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l'])) - mixlogp2 = logsumexp(np.log(point['l_w']) + complogp2, - axis=-1, keepdims=True) + priorlogp = ( + st.dirichlet.logpdf(x=point["g_w"], alpha=np.ones(nbr) * 0.0000001) + + st.expon.logpdf(x=point["mu_g"]).sum() + + st.dirichlet.logpdf(x=point["l_w"], alpha=np.ones(nbr) * 0.0000001) + + st.expon.logpdf(x=point["mu_l"]).sum() + + st.dirichlet.logpdf(x=point["mix_w"], alpha=np.ones(2)) + ) + complogp1 = st.norm.logpdf(x=value, loc=point["mu_g"]) + mixlogp1 = logsumexp( + np.log(point["g_w"]) + complogp1, axis=-1, keepdims=True + ) + complogp2 = st.lognorm.logpdf(value, 1.0, 0.0, np.exp(point["mu_l"])) + mixlogp2 = logsumexp( + np.log(point["l_w"]) + complogp2, axis=-1, keepdims=True + ) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) - mixmixlogpg = logsumexp(np.log(point['mix_w']) + complogp_mix, - axis=-1, keepdims=True) + mixmixlogpg = logsumexp( + np.log(point["mix_w"]) + complogp_mix, axis=-1, keepdims=True + ) return priorlogp, mixmixlogpg value = np.exp(self.norm_x)[:, None] @@ -221,13 +245,11 @@ def mixmixlogp(value, point): assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point)) # check model logp - assert_allclose(priorlogp + mixmixlogpg.sum(), - model.logp(test_point)) + assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point)) # check input and check logp again - test_point['g_w'] = np.asarray([.1, .1, .2, .6]) - test_point['mu_g'] = np.exp(np.random.randn(nbr)) + test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6]) + test_point["mu_g"] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point)) - assert_allclose(priorlogp + mixmixlogpg.sum(), - model.logp(test_point)) + assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point)) diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index 473ff3a2a4..e7163415d1 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -12,41 +12,41 @@ class NewModel(pm.Model): - def __init__(self, name='', model=None): + def __init__(self, name="", model=None): super(NewModel, self).__init__(name, model) assert pm.modelcontext(None) is self # 1) init variables with Var method - self.Var('v1', pm.Normal.dist()) - self.v2 = pm.Normal('v2', mu=0, sd=1) + self.Var("v1", pm.Normal.dist()) + self.v2 = pm.Normal("v2", mu=0, sd=1) # 2) Potentials and Deterministic variables with method too # be sure that names will not overlap with other same models - pm.Deterministic('d', tt.constant(1)) - pm.Potential('p', tt.constant(1)) + pm.Deterministic("d", tt.constant(1)) + pm.Potential("p", tt.constant(1)) class DocstringModel(pm.Model): - def __init__(self, mean=0, sd=1, name='', model=None): + def __init__(self, mean=0, sd=1, name="", model=None): super(DocstringModel, self).__init__(name, model) - self.Var('v1', Normal.dist(mu=mean, sd=sd)) - Normal('v2', mu=mean, sd=sd) - Normal('v3', mu=mean, sd=HalfCauchy('sd', beta=10, testval=1.)) - Deterministic('v3_sq', self.v3 ** 2) - Potential('p1', tt.constant(1)) + self.Var("v1", Normal.dist(mu=mean, sd=sd)) + Normal("v2", mu=mean, sd=sd) + Normal("v3", mu=mean, sd=HalfCauchy("sd", beta=10, testval=1.0)) + Deterministic("v3_sq", self.v3 ** 2) + Potential("p1", tt.constant(1)) class TestBaseModel(object): def test_setattr_properly_works(self): with pm.Model() as model: - pm.Normal('v1') + pm.Normal("v1") assert len(model.vars) == 1 - with pm.Model('sub') as submodel: - submodel.Var('v1', pm.Normal.dist()) - assert hasattr(submodel, 'v1') + with pm.Model("sub") as submodel: + submodel.Var("v1", pm.Normal.dist()) + assert hasattr(submodel, "v1") assert len(submodel.vars) == 1 assert len(model.vars) == 2 with submodel: - submodel.Var('v2', pm.Normal.dist()) - assert hasattr(submodel, 'v2') + submodel.Var("v2", pm.Normal.dist()) + assert hasattr(submodel, "v2") assert len(submodel.vars) == 2 assert len(model.vars) == 3 @@ -55,26 +55,26 @@ def test_context_passes_vars_to_parent_model(self): # a set of variables is created NewModel() # another set of variables are created but with prefix 'another' - usermodel2 = NewModel(name='another') + usermodel2 = NewModel(name="another") # you can enter in a context with submodel with usermodel2: - usermodel2.Var('v3', pm.Normal.dist()) - pm.Normal('v4') + usermodel2.Var("v3", pm.Normal.dist()) + pm.Normal("v4") # this variable is created in parent model too - assert 'another_v2' in model.named_vars - assert 'another_v3' in model.named_vars - assert 'another_v3' in usermodel2.named_vars - assert 'another_v4' in model.named_vars - assert 'another_v4' in usermodel2.named_vars - assert hasattr(usermodel2, 'v3') - assert hasattr(usermodel2, 'v2') - assert hasattr(usermodel2, 'v4') + assert "another_v2" in model.named_vars + assert "another_v3" in model.named_vars + assert "another_v3" in usermodel2.named_vars + assert "another_v4" in model.named_vars + assert "another_v4" in usermodel2.named_vars + assert hasattr(usermodel2, "v3") + assert hasattr(usermodel2, "v2") + assert hasattr(usermodel2, "v4") # When you create a class based model you should follow some rules with model: - m = NewModel('one_more') - assert m.d is model['one_more_d'] - assert m['d'] is model['one_more_d'] - assert m['one_more_d'] is model['one_more_d'] + m = NewModel("one_more") + assert m.d is model["one_more_d"] + assert m["d"] is model["one_more_d"] + assert m["one_more_d"] is model["one_more_d"] class TestNested(object): @@ -84,37 +84,37 @@ def test_nest_context_works(self): with new: assert pm.modelcontext(None) is new assert pm.modelcontext(None) is m - assert 'v1' in m.named_vars - assert 'v2' in m.named_vars + assert "v1" in m.named_vars + assert "v2" in m.named_vars def test_named_context(self): with pm.Model() as m: - NewModel(name='new') - assert 'new_v1' in m.named_vars - assert 'new_v2' in m.named_vars + NewModel(name="new") + assert "new_v1" in m.named_vars + assert "new_v2" in m.named_vars def test_docstring_example1(self): usage1 = DocstringModel() - assert 'v1' in usage1.named_vars - assert 'v2' in usage1.named_vars - assert 'v3' in usage1.named_vars - assert 'v3_sq' in usage1.named_vars + assert "v1" in usage1.named_vars + assert "v2" in usage1.named_vars + assert "v3" in usage1.named_vars + assert "v3_sq" in usage1.named_vars assert len(usage1.potentials), 1 def test_docstring_example2(self): with pm.Model() as model: - DocstringModel(name='prefix') - assert 'prefix_v1' in model.named_vars - assert 'prefix_v2' in model.named_vars - assert 'prefix_v3' in model.named_vars - assert 'prefix_v3_sq' in model.named_vars + DocstringModel(name="prefix") + assert "prefix_v1" in model.named_vars + assert "prefix_v2" in model.named_vars + assert "prefix_v3" in model.named_vars + assert "prefix_v3_sq" in model.named_vars assert len(model.potentials), 1 def test_duplicates_detection(self): with pm.Model(): - DocstringModel(name='prefix') + DocstringModel(name="prefix") with pytest.raises(ValueError): - DocstringModel(name='prefix') + DocstringModel(name="prefix") def test_model_root(self): with pm.Model() as model: @@ -127,15 +127,15 @@ class TestObserved(object): def test_observed_rv_fail(self): with pytest.raises(TypeError): with pm.Model(): - x = Normal('x') - Normal('n', observed=x) + x = Normal("x") + Normal("n", observed=x) def test_observed_type(self): X_ = np.random.randn(100, 5) X = pm.floatX(theano.shared(X_)) with pm.Model(): - x1 = pm.Normal('x1', observed=X_) - x2 = pm.Normal('x2', observed=X) + x1 = pm.Normal("x1", observed=X_) + x2 = pm.Normal("x2", observed=X) assert x1.type == X.type assert x2.type == X.type @@ -143,89 +143,89 @@ def test_observed_type(self): class TestTheanoConfig(object): def test_set_testval_raise(self): - with theano.configparser.change_flags(compute_test_value='off'): + with theano.configparser.change_flags(compute_test_value="off"): with pm.Model(): - assert theano.config.compute_test_value == 'raise' - assert theano.config.compute_test_value == 'off' + assert theano.config.compute_test_value == "raise" + assert theano.config.compute_test_value == "off" def test_nested(self): - with theano.configparser.change_flags(compute_test_value='off'): - with pm.Model(theano_config={'compute_test_value': 'ignore'}): - assert theano.config.compute_test_value == 'ignore' - with pm.Model(theano_config={'compute_test_value': 'warn'}): - assert theano.config.compute_test_value == 'warn' - assert theano.config.compute_test_value == 'ignore' - assert theano.config.compute_test_value == 'off' + with theano.configparser.change_flags(compute_test_value="off"): + with pm.Model(theano_config={"compute_test_value": "ignore"}): + assert theano.config.compute_test_value == "ignore" + with pm.Model(theano_config={"compute_test_value": "warn"}): + assert theano.config.compute_test_value == "warn" + assert theano.config.compute_test_value == "ignore" + assert theano.config.compute_test_value == "off" def test_duplicate_vars(): with pytest.raises(ValueError) as err: with pm.Model(): - pm.Normal('a') - pm.Normal('a') - err.match('already exists') + pm.Normal("a") + pm.Normal("a") + err.match("already exists") with pytest.raises(ValueError) as err: with pm.Model(): - pm.Normal('a') - pm.Normal('a', transform=transforms.log) - err.match('already exists') + pm.Normal("a") + pm.Normal("a", transform=transforms.log) + err.match("already exists") with pytest.raises(ValueError) as err: with pm.Model(): - a = pm.Normal('a') - pm.Potential('a', a**2) - err.match('already exists') + a = pm.Normal("a") + pm.Potential("a", a ** 2) + err.match("already exists") with pytest.raises(ValueError) as err: with pm.Model(): - pm.Binomial('a', 10, .5) - pm.Normal('a', transform=transforms.log) - err.match('already exists') + pm.Binomial("a", 10, 0.5) + pm.Normal("a", transform=transforms.log) + err.match("already exists") def test_empty_observed(): data = pd.DataFrame(np.ones((2, 3)) / 3) data.values[:] = np.nan with pm.Model(): - a = pm.Normal('a', observed=data) + a = pm.Normal("a", observed=data) npt.assert_allclose(a.tag.test_value, np.zeros((2, 3))) - b = pm.Beta('b', alpha=1, beta=1, observed=data) + b = pm.Beta("b", alpha=1, beta=1, observed=data) npt.assert_allclose(b.tag.test_value, np.ones((2, 3)) / 2) class TestValueGradFunction(unittest.TestCase): def test_no_extra(self): - a = tt.vector('a') + a = tt.vector("a") a.tag.test_value = np.zeros(3, dtype=a.dtype) a.dshape = (3,) a.dsize = 3 - f_grad = ValueGradFunction(a.sum(), [a], [], mode='FAST_COMPILE') + f_grad = ValueGradFunction(a.sum(), [a], [], mode="FAST_COMPILE") assert f_grad.size == 3 def test_invalid_type(self): - a = tt.ivector('a') + a = tt.ivector("a") a.tag.test_value = np.zeros(3, dtype=a.dtype) a.dshape = (3,) a.dsize = 3 with pytest.raises(TypeError) as err: - ValueGradFunction(a.sum(), [a], [], mode='FAST_COMPILE') - err.match('Invalid dtype') + ValueGradFunction(a.sum(), [a], [], mode="FAST_COMPILE") + err.match("Invalid dtype") def setUp(self): - extra1 = tt.iscalar('extra1') + extra1 = tt.iscalar("extra1") extra1_ = np.array(0, dtype=extra1.dtype) extra1.tag.test_value = extra1_ extra1.dshape = tuple() extra1.dsize = 1 - val1 = tt.vector('val1') + val1 = tt.vector("val1") val1_ = np.zeros(3, dtype=val1.dtype) val1.tag.test_value = val1_ val1.dshape = (3,) val1.dsize = 3 - val2 = tt.matrix('val2') + val2 = tt.matrix("val2") val2_ = np.zeros((2, 3), dtype=val2.dtype) val2.tag.test_value = val2_ val2.dshape = (2, 3) @@ -238,47 +238,47 @@ def setUp(self): self.cost = extra1 * val1.sum() + val2.sum() self.f_grad = ValueGradFunction( - self.cost, [val1, val2], [extra1], mode='FAST_COMPILE') + self.cost, [val1, val2], [extra1], mode="FAST_COMPILE" + ) def test_extra_not_set(self): with pytest.raises(ValueError) as err: self.f_grad.get_extra_values() - err.match('Extra values are not set') + err.match("Extra values are not set") with pytest.raises(ValueError) as err: self.f_grad(np.zeros(self.f_grad.size, dtype=self.f_grad.dtype)) - err.match('Extra values are not set') + err.match("Extra values are not set") def test_grad(self): - self.f_grad.set_extra_values({'extra1': 5}) + self.f_grad.set_extra_values({"extra1": 5}) array = np.ones(self.f_grad.size, dtype=self.f_grad.dtype) val, grad = self.f_grad(array) assert val == 21 npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1]) def test_bij(self): - self.f_grad.set_extra_values({'extra1': 5}) + self.f_grad.set_extra_values({"extra1": 5}) array = np.ones(self.f_grad.size, dtype=self.f_grad.dtype) point = self.f_grad.array_to_dict(array) assert len(point) == 2 - npt.assert_allclose(point['val1'], 1) - npt.assert_allclose(point['val2'], 1) + npt.assert_allclose(point["val1"], 1) + npt.assert_allclose(point["val2"], 1) array2 = self.f_grad.dict_to_array(point) npt.assert_allclose(array2, array) point_ = self.f_grad.array_to_full_dict(array) assert len(point_) == 3 - assert point_['extra1'] == 5 + assert point_["extra1"] == 5 def test_edge_case(self): # Edge case discovered in #2948 ndim = 3 with pm.Model() as m: - pm.Lognormal('sigma', - mu=np.zeros(ndim), - tau=np.ones(ndim), - shape=ndim) # variance for the correlation matrix - pm.HalfCauchy('nu', beta=10) + pm.Lognormal( + "sigma", mu=np.zeros(ndim), tau=np.ones(ndim), shape=ndim + ) # variance for the correlation matrix + pm.HalfCauchy("nu", beta=10) step = pm.NUTS() func = step._logp_dlogp_func @@ -287,4 +287,4 @@ def test_edge_case(self): logp, dlogp = func(q) assert logp.size == 1 assert dlogp.size == 4 - npt.assert_allclose(dlogp, 0., atol=1e-5) + npt.assert_allclose(dlogp, 0.0, atol=1e-5) diff --git a/pymc3/tests/test_model_func.py b/pymc3/tests/test_model_func.py index 95441b1b63..9f60885e0d 100644 --- a/pymc3/tests/test_model_func.py +++ b/pymc3/tests/test_model_func.py @@ -5,42 +5,43 @@ from .models import simple_model, mv_simple -tol = 2.0**-11 +tol = 2.0 ** -11 + def test_logp(): start, model, (mu, sig) = simple_model() lp = model.fastlogp lp(start) - close_to(lp(start), sp.norm.logpdf(start['x'], mu, sig).sum(), tol) + close_to(lp(start), sp.norm.logpdf(start["x"], mu, sig).sum(), tol) def test_dlogp(): start, model, (mu, sig) = simple_model() dlogp = model.fastdlogp() - close_to(dlogp(start), -(start['x'] - mu) / sig**2, 1. / sig**2 / 100.) + close_to(dlogp(start), -(start["x"] - mu) / sig ** 2, 1.0 / sig ** 2 / 100.0) def test_dlogp2(): start, model, (_, sig) = mv_simple() H = np.linalg.inv(sig) d2logp = model.fastd2logp() - close_to(d2logp(start), H, np.abs(H / 100.)) + close_to(d2logp(start), H, np.abs(H / 100.0)) def test_deterministic(): with pm.Model() as model: - x = pm.Normal('x', 0, 1) - y = pm.Deterministic('y', x**2) + x = pm.Normal("x", 0, 1) + y = pm.Deterministic("y", x ** 2) assert model.y == y - assert model['y'] == y + assert model["y"] == y def test_mapping(): with pm.Model() as model: - mu = pm.Normal('mu', 0, 1) - sd = pm.Gamma('sd', 1, 1) - y = pm.Normal('y', mu, sd, observed=np.array([.1, .5])) + mu = pm.Normal("mu", 0, 1) + sd = pm.Gamma("sd", 1, 1) + y = pm.Normal("y", mu, sd, observed=np.array([0.1, 0.5])) lp = model.fastlogp lparray = model.logp_array point = model.test_point @@ -50,6 +51,3 @@ def test_mapping(): randarray = np.random.randn(*parray.shape) randpoint = model.bijection.rmap(randarray) assert lp(randpoint) == lparray(randarray) - - - diff --git a/pymc3/tests/test_model_graph.py b/pymc3/tests/test_model_graph.py index c261410cc9..70531ecb8e 100644 --- a/pymc3/tests/test_model_graph.py +++ b/pymc3/tests/test_model_graph.py @@ -9,42 +9,39 @@ def radon_model(): """Similar in shape to the Radon model""" n_homes = 919 counties = 85 - uranium = np.random.normal(-.1, 0.4, size=n_homes) + uranium = np.random.normal(-0.1, 0.4, size=n_homes) xbar = np.random.normal(1, 0.1, size=n_homes) floor_measure = np.random.randint(0, 2, size=n_homes) log_radon = np.random.normal(1, 1, size=n_homes) d, r = divmod(919, 85) - county = np.hstack(( - np.tile(np.arange(counties, dtype=int), d), - np.arange(r) - )) + county = np.hstack((np.tile(np.arange(counties, dtype=int), d), np.arange(r))) with pm.Model() as model: - sigma_a = pm.HalfCauchy('sigma_a', 5) - gamma = pm.Normal('gamma', mu=0., sd=1e5, shape=3) - mu_a = pm.Deterministic('mu_a', gamma[0] + gamma[1]*uranium + gamma[2]*xbar) - eps_a = pm.Normal('eps_a', mu=0, sd=sigma_a, shape=counties) - a = pm.Deterministic('a', mu_a + eps_a[county]) - b = pm.Normal('b', mu=0., sd=1e15) - sigma_y = pm.Uniform('sigma_y', lower=0, upper=100) + sigma_a = pm.HalfCauchy("sigma_a", 5) + gamma = pm.Normal("gamma", mu=0.0, sd=1e5, shape=3) + mu_a = pm.Deterministic("mu_a", gamma[0] + gamma[1] * uranium + gamma[2] * xbar) + eps_a = pm.Normal("eps_a", mu=0, sd=sigma_a, shape=counties) + a = pm.Deterministic("a", mu_a + eps_a[county]) + b = pm.Normal("b", mu=0.0, sd=1e15) + sigma_y = pm.Uniform("sigma_y", lower=0, upper=100) y_hat = a + b * floor_measure - y_like = pm.Normal('y_like', mu=y_hat, sd=sigma_y, observed=log_radon) + y_like = pm.Normal("y_like", mu=y_hat, sd=sigma_y, observed=log_radon) compute_graph = { - 'sigma_a': set(), - 'gamma': set(), - 'mu_a': {'gamma'}, - 'eps_a': {'sigma_a'}, - 'a': {'mu_a', 'eps_a'}, - 'b': set(), - 'sigma_y': set(), - 'y_like': {'a', 'b', 'sigma_y'} + "sigma_a": set(), + "gamma": set(), + "mu_a": {"gamma"}, + "eps_a": {"sigma_a"}, + "a": {"mu_a", "eps_a"}, + "b": set(), + "sigma_y": set(), + "y_like": {"a", "b", "sigma_y"}, } plates = { - (): {'b', 'sigma_a', 'sigma_y'}, - (3,): {'gamma'}, - (85,): {'eps_a'}, - (919,): {'a', 'mu_a', 'y_like'}, + (): {"b", "sigma_a", "sigma_y"}, + (3,): {"gamma"}, + (85,): {"eps_a"}, + (919,): {"a", "mu_a", "y_like"}, } return model, compute_graph, plates @@ -76,4 +73,3 @@ def test_graphviz(self): g = model_to_graphviz(self.model) for key in self.compute_graph: assert key in g.source - diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py index 2b191bd144..2c1023a4a0 100644 --- a/pymc3/tests/test_model_helpers.py +++ b/pymc3/tests/test_model_helpers.py @@ -20,21 +20,20 @@ def test_pandas_to_array(self): sparse_input = sps.csr_matrix(np.eye(3)) dense_input = np.arange(9).reshape((3, 3)) - input_name = 'input_variable' + input_name = "input_variable" theano_graph_input = tt.as_tensor(dense_input, name=input_name) pandas_input = pd.DataFrame(dense_input) # All the even numbers are replaced with NaN - missing_pandas_input = pd.DataFrame(np.array([[np.nan, 1, np.nan], - [3, np.nan, 5], - [np.nan, 7, np.nan]])) - masked_array_input = ma.array(dense_input, - mask=(np.mod(dense_input, 2) == 0)) + missing_pandas_input = pd.DataFrame( + np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]]) + ) + masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) # Create a generator object. Apparently the generator object needs to # yield numpy arrays. - square_generator = (np.array([i**2], dtype=int) for i in range(100)) + square_generator = (np.array([i ** 2], dtype=int) for i in range(100)) # Alias the function to be tested func = pm.model.pandas_to_array @@ -54,8 +53,7 @@ def test_pandas_to_array(self): sparse_output = func(sparse_input) assert sps.issparse(sparse_output) assert sparse_output.shape == sparse_input.shape - npt.assert_allclose(sparse_output.toarray(), - sparse_input.toarray()) + npt.assert_allclose(sparse_output.toarray(), sparse_input.toarray()) # Check function behavior when using masked array inputs and pandas # objects with missing data @@ -89,11 +87,10 @@ def test_as_tensor(self): should return a Sparse Theano object. """ # Create the various inputs to the function - input_name = 'testing_inputs' + input_name = "testing_inputs" sparse_input = sps.csr_matrix(np.eye(3)) dense_input = np.arange(9).reshape((3, 3)) - masked_array_input = ma.array(dense_input, - mask=(np.mod(dense_input, 2) == 0)) + masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) # Create a fake model and fake distribution to be used for the test fake_model = pm.Model() @@ -106,18 +103,11 @@ def test_as_tensor(self): func = pm.model.as_tensor # Check function behavior using the various inputs - dense_output = func(dense_input, - input_name, - fake_model, - fake_distribution) - sparse_output = func(sparse_input, - input_name, - fake_model, - fake_distribution) - masked_output = func(masked_array_input, - input_name, - fake_model, - fake_distribution) + dense_output = func(dense_input, input_name, fake_model, fake_distribution) + sparse_output = func(sparse_input, input_name, fake_model, fake_distribution) + masked_output = func( + masked_array_input, input_name, fake_model, fake_distribution + ) # Ensure that the missing values are appropriately set to None for func_output in [dense_output, sparse_output]: diff --git a/pymc3/tests/test_modelcontext.py b/pymc3/tests/test_modelcontext.py index d3073daf84..86c7ffcff8 100644 --- a/pymc3/tests/test_modelcontext.py +++ b/pymc3/tests/test_modelcontext.py @@ -12,21 +12,24 @@ def test_thread_safety(self): that thread A enters the context manager first, then B, then A attempts to declare a variable while B is still in the context manager. """ - aInCtxt,bInCtxt,aDone = [threading.Event() for _ in range(3)] + aInCtxt, bInCtxt, aDone = [threading.Event() for _ in range(3)] modelA = Model() modelB = Model() + def make_model_a(): with modelA: aInCtxt.set() bInCtxt.wait() - Normal('a',0,1) + Normal("a", 0, 1) aDone.set() + def make_model_b(): aInCtxt.wait() with modelB: bInCtxt.set() aDone.wait() - Normal('b', 0, 1) + Normal("b", 0, 1) + threadA = threading.Thread(target=make_model_a) threadB = threading.Thread(target=make_model_b) threadA.start() @@ -38,7 +41,4 @@ def make_model_b(): # - B enters it's model context after A, but before a is declared -> a goes into B # - A leaves it's model context before B attempts to declare b. A's context manager # takes B from the stack, such that b ends up in model A - assert ( - list(modelA.named_vars), - list(modelB.named_vars), - ) == (['a'],['b']) + assert (list(modelA.named_vars), list(modelB.named_vars)) == (["a"], ["b"]) diff --git a/pymc3/tests/test_models_linear.py b/pymc3/tests/test_models_linear.py index bc8c0ee0d2..352abaaaf2 100644 --- a/pymc3/tests/test_models_linear.py +++ b/pymc3/tests/test_models_linear.py @@ -18,7 +18,7 @@ def setup_class(cls): super(TestGLM, cls).setup_class() cls.intercept = 1 cls.slope = 3 - cls.sd = .05 + cls.sd = 0.05 x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000) cls.y_linear += np.random.normal(size=1000, scale=cls.sd) cls.data_linear = dict(x=x_linear, y=cls.y_linear) @@ -29,86 +29,99 @@ def setup_class(cls): cls.data_logistic = dict(x=x_logistic, y=bern_trials) def test_linear_component(self): - vars_to_create = { - 'sigma', - 'sigma_interval__', - 'y_obs', - 'lm_x0', - 'lm_Intercept' - } + vars_to_create = {"sigma", "sigma_interval__", "y_obs", "lm_x0", "lm_Intercept"} with Model() as model: lm = LinearComponent( - self.data_linear['x'], - self.data_linear['y'], - name='lm' - ) # yields lm_x0, lm_Intercept - sigma = Uniform('sigma', 0, 20) # yields sigma_interval__ - Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear) # yields y_obs + self.data_linear["x"], self.data_linear["y"], name="lm" + ) # yields lm_x0, lm_Intercept + sigma = Uniform("sigma", 0, 20) # yields sigma_interval__ + Normal( + "y_obs", mu=lm.y_est, sd=sigma, observed=self.y_linear + ) # yields y_obs start = find_MAP(vars=[sigma]) step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) + trace = sample( + 500, + tune=0, + step=step, + start=start, + progressbar=False, + random_seed=self.random_seed, + ) - assert round(abs(np.mean(trace['lm_Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['lm_x0'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["lm_Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["lm_x0"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0 assert vars_to_create == set(model.named_vars.keys()) def test_linear_component_from_formula(self): with Model() as model: - lm = LinearComponent.from_formula('y ~ x', self.data_linear) - sigma = Uniform('sigma', 0, 20) - Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear) + lm = LinearComponent.from_formula("y ~ x", self.data_linear) + sigma = Uniform("sigma", 0, 20) + Normal("y_obs", mu=lm.y_est, sd=sigma, observed=self.y_linear) start = find_MAP(vars=[sigma]) step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, - random_seed=self.random_seed) + trace = sample( + 500, + tune=0, + step=step, + start=start, + progressbar=False, + random_seed=self.random_seed, + ) - assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0 def test_glm(self): with Model() as model: vars_to_create = { - 'glm_sd', - 'glm_sd_log__', - 'glm_y', - 'glm_x0', - 'glm_Intercept' + "glm_sd", + "glm_sd_log__", + "glm_y", + "glm_x0", + "glm_Intercept", } - GLM( - self.data_linear['x'], - self.data_linear['y'], - name='glm' - ) + GLM(self.data_linear["x"], self.data_linear["y"], name="glm") start = find_MAP() step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) - assert round(abs(np.mean(trace['glm_Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['glm_x0'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['glm_sd'])-self.sd), 1) == 0 + trace = sample( + 500, + tune=0, + step=step, + start=start, + progressbar=False, + random_seed=self.random_seed, + ) + assert round(abs(np.mean(trace["glm_Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["glm_x0"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["glm_sd"]) - self.sd), 1) == 0 assert vars_to_create == set(model.named_vars.keys()) def test_glm_from_formula(self): with Model() as model: - NAME = 'glm' - GLM.from_formula('y ~ x', self.data_linear, name=NAME) + NAME = "glm" + GLM.from_formula("y ~ x", self.data_linear, name=NAME) start = find_MAP() step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) + trace = sample( + 500, + tune=0, + step=step, + start=start, + progressbar=False, + random_seed=self.random_seed, + ) - assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0 + assert ( + round(abs(np.mean(trace["%s_Intercept" % NAME]) - self.intercept), 1) + == 0 + ) + assert round(abs(np.mean(trace["%s_x" % NAME]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["%s_sd" % NAME]) - self.sd), 1) == 0 def test_strange_types(self): with Model(): - with pytest.raises( - ValueError): - GLM(1, - self.data_linear['y'], - name='lm') + with pytest.raises(ValueError): + GLM(1, self.data_linear["y"], name="lm") diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py index d32aea7ec4..7168b0692e 100644 --- a/pymc3/tests/test_models_utils.py +++ b/pymc3/tests/test_models_utils.py @@ -11,63 +11,64 @@ def setup_method(self): def assertMatrixLabels(self, m, l, mt=None, lt=None): assert np.all( - np.equal( - m.eval(), - mt if mt is not None else self.data.as_matrix() - ) - ) + np.equal(m.eval(), mt if mt is not None else self.data.as_matrix()) + ) assert l == list(lt or self.data.columns) def test_numpy_init(self): m, l = utils.any_to_tensor_and_labels(self.data.as_matrix()) - self.assertMatrixLabels(m, l, lt=['x0', 'x1']) - m, l = utils.any_to_tensor_and_labels(self.data.as_matrix(), labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=["x0", "x1"]) + m, l = utils.any_to_tensor_and_labels( + self.data.as_matrix(), labels=["x2", "x3"] + ) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_pandas_init(self): m, l = utils.any_to_tensor_and_labels(self.data) self.assertMatrixLabels(m, l) - m, l = utils.any_to_tensor_and_labels(self.data, labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + m, l = utils.any_to_tensor_and_labels(self.data, labels=["x2", "x3"]) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_dict_input(self): - m, l = utils.any_to_tensor_and_labels(self.data.to_dict('dict')) + m, l = utils.any_to_tensor_and_labels(self.data.to_dict("dict")) self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) - m, l = utils.any_to_tensor_and_labels(self.data.to_dict('series')) + m, l = utils.any_to_tensor_and_labels(self.data.to_dict("series")) self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) - m, l = utils.any_to_tensor_and_labels(self.data.to_dict('list')) + m, l = utils.any_to_tensor_and_labels(self.data.to_dict("list")) self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) - inp = {k: tt.as_tensor_variable(v) for k, v in self.data.to_dict('series').items()} + inp = { + k: tt.as_tensor_variable(v) for k, v in self.data.to_dict("series").items() + } m, l = utils.any_to_tensor_and_labels(inp) self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l) def test_list_input(self): m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist()) - self.assertMatrixLabels(m, l, lt=['x0', 'x1']) - m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=["x0", "x1"]) + m, l = utils.any_to_tensor_and_labels( + self.data.as_matrix().tolist(), labels=["x2", "x3"] + ) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_tensor_input(self): m, l = utils.any_to_tensor_and_labels( - tt.as_tensor_variable(self.data.as_matrix().tolist()), - labels=['x0', 'x1'] + tt.as_tensor_variable(self.data.as_matrix().tolist()), labels=["x0", "x1"] ) - self.assertMatrixLabels(m, l, lt=['x0', 'x1']) + self.assertMatrixLabels(m, l, lt=["x0", "x1"]) m, l = utils.any_to_tensor_and_labels( - tt.as_tensor_variable(self.data.as_matrix().tolist()), - labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + tt.as_tensor_variable(self.data.as_matrix().tolist()), labels=["x2", "x3"] + ) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_user_mistakes(self): # no labels for tensor variable - with pytest.raises( - ValueError): - utils.any_to_tensor_and_labels(tt.as_tensor_variable(self.data.as_matrix().tolist())) + with pytest.raises(ValueError): + utils.any_to_tensor_and_labels( + tt.as_tensor_variable(self.data.as_matrix().tolist()) + ) # len of labels is bad - with pytest.raises( - ValueError): - utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), - labels=['x']) + with pytest.raises(ValueError): + utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), labels=["x"]) diff --git a/pymc3/tests/test_ndarray_backend.py b/pymc3/tests/test_ndarray_backend.py index 12b295551a..7fddc75906 100644 --- a/pymc3/tests/test_ndarray_backend.py +++ b/pymc3/tests/test_ndarray_backend.py @@ -6,17 +6,9 @@ import pytest -STATS1 = [{ - 'a': np.float64, - 'b': np.bool -}] +STATS1 = [{"a": np.float64, "b": np.bool}] -STATS2 = [{ - 'a': np.float64 -}, { - 'a': np.float64, - 'b': np.int64, -}] +STATS2 = [{"a": np.float64}, {"a": np.float64, "b": np.int64}] class TestNDArray0dSampling(bf.SamplingTestCase): @@ -128,7 +120,7 @@ class TestMultiTrace_add_remove_values(bf.ModelBackendSampledTestCase): def test_add_values(self): mtrace = self.mtrace orig_varnames = list(mtrace.varnames) - name = 'new_var' + name = "new_var" vals = mtrace[orig_varnames[0]] mtrace.add_values({name: vals}) assert len(orig_varnames) == len(mtrace.varnames) - 1 @@ -140,7 +132,6 @@ def test_add_values(self): class TestSqueezeCat(object): - def setup_method(self): self.x = np.arange(10) self.y = np.arange(10, 20) @@ -170,13 +161,14 @@ def test_combine_true_squeeze_true(self): result = base._squeeze_cat([self.x, self.y], True, True) npt.assert_equal(result, expected) + class TestSaveLoad(object): @staticmethod def model(): with pm.Model() as model: - x = pm.Normal('x', 0, 1) - y = pm.Normal('y', x, 1, observed=2) - z = pm.Normal('z', x + y, 1) + x = pm.Normal("x", 0, 1) + y = pm.Normal("y", x, 1, observed=2) + z = pm.Normal("z", x + y, 1) return model @classmethod @@ -185,12 +177,12 @@ def setup_class(cls): cls.trace = pm.sample() def test_save_new_model(self, tmpdir_factory): - directory = str(tmpdir_factory.mktemp('data')) + directory = str(tmpdir_factory.mktemp("data")) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory with pm.Model() as model: - w = pm.Normal('w', 0, 1) + w = pm.Normal("w", 0, 1) new_trace = pm.sample() with pytest.raises(OSError): @@ -200,21 +192,21 @@ def test_save_new_model(self, tmpdir_factory): with model: new_trace_copy = pm.load_trace(directory) - assert (new_trace['w'] == new_trace_copy['w']).all() + assert (new_trace["w"] == new_trace_copy["w"]).all() def test_save_and_load(self, tmpdir_factory): - directory = str(tmpdir_factory.mktemp('data')) + directory = str(tmpdir_factory.mktemp("data")) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory trace2 = pm.load_trace(directory, model=TestSaveLoad.model()) - for var in ('x', 'z'): + for var in ("x", "z"): assert (self.trace[var] == trace2[var]).all() def test_sample_posterior_predictive(self, tmpdir_factory): - directory = str(tmpdir_factory.mktemp('data')) + directory = str(tmpdir_factory.mktemp("data")) save_dir = pm.save_trace(self.trace, directory, overwrite=True) assert save_dir == directory diff --git a/pymc3/tests/test_parallel_sampling.py b/pymc3/tests/test_parallel_sampling.py index 515c130a90..eeceb3e23f 100644 --- a/pymc3/tests/test_parallel_sampling.py +++ b/pymc3/tests/test_parallel_sampling.py @@ -6,39 +6,39 @@ import pymc3 as pm -@pytest.mark.skipif(sys.version_info < (3,3), - reason="requires python3.3") +@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") def test_abort(): with pm.Model() as model: - a = pm.Normal('a', shape=1) - pm.HalfNormal('b') + a = pm.Normal("a", shape=1) + pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) - proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1, - start={'a': 1., 'b_log__': 2.}) + proc = ps.ProcessAdapter( + 10, 10, step, chain=3, seed=1, start={"a": 1.0, "b_log__": 2.0} + ) proc.start() proc.write_next() proc.abort() proc.join() -@pytest.mark.skipif(sys.version_info < (3,3), - reason="requires python3.3") +@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") def test_explicit_sample(): with pm.Model() as model: - a = pm.Normal('a', shape=1) - pm.HalfNormal('b') + a = pm.Normal("a", shape=1) + pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) start = time.time() - proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1, - start={'a': 1., 'b_log__': 2.}) + proc = ps.ProcessAdapter( + 10, 10, step, chain=3, seed=1, start={"a": 1.0, "b_log__": 2.0} + ) proc.start() while True: proc.write_next() @@ -52,21 +52,19 @@ def test_explicit_sample(): print(time.time() - start) -@pytest.mark.skipif(sys.version_info < (3,3), - reason="requires python3.3") +@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") def test_iterator(): with pm.Model() as model: - a = pm.Normal('a', shape=1) - pm.HalfNormal('b') + a = pm.Normal("a", shape=1) + pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) start = time.time() - start = {'a': 1., 'b_log__': 2.} - sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3, - step, 0, False) + start = {"a": 1.0, "b_log__": 2.0} + sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3, step, 0, False) with sampler: for draw in sampler: pass diff --git a/pymc3/tests/test_pickling.py b/pymc3/tests/test_pickling.py index c3274af14e..5249af834f 100644 --- a/pymc3/tests/test_pickling.py +++ b/pymc3/tests/test_pickling.py @@ -9,12 +9,13 @@ def setup_method(self): def test_model_roundtrip(self): m = self.model - for proto in range(pickle.HIGHEST_PROTOCOL+1): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): try: s = pickle.dumps(m, proto) pickle.loads(s) except Exception: raise AssertionError( - "Exception while trying roundtrip with pickle protocol %d:\n" % proto + - ''.join(traceback.format_exc()) + "Exception while trying roundtrip with pickle protocol %d:\n" + % proto + + "".join(traceback.format_exc()) ) diff --git a/pymc3/tests/test_plots.py b/pymc3/tests/test_plots.py index fa5e9100c3..03baf8a332 100644 --- a/pymc3/tests/test_plots.py +++ b/pymc3/tests/test_plots.py @@ -1,12 +1,21 @@ import matplotlib -matplotlib.use('Agg', warn=False) # noqa + +matplotlib.use("Agg", warn=False) # noqa import numpy as np import pymc3 as pm from .checks import close_to from .models import multidimensional_model, simple_categorical -from ..plots import traceplot, forestplot, autocorrplot, plot_posterior, energyplot, densityplot, pairplot +from ..plots import ( + traceplot, + forestplot, + autocorrplot, + plot_posterior, + energyplot, + densityplot, + pairplot, +) from ..plots.utils import make_2d from ..step_methods import Slice, Metropolis from ..sampling import sample @@ -30,7 +39,8 @@ def test_plots(): plot_posterior(trace) autocorrplot(trace) energyplot(trace) - densityplot(trace) + densityplot(trace) + def test_energyplot(): with asmod.build_model(): @@ -38,7 +48,7 @@ def test_energyplot(): energyplot(trace) energyplot(trace, shade=0.5, alpha=0) - energyplot(trace, kind='hist') + energyplot(trace, kind="hist") def test_plots_categorical(): @@ -60,25 +70,27 @@ def test_plots_multidimensional(): h = np.diag(find_hessian(start)) step = Metropolis(model.vars, h) trace = sample(3000, tune=0, step=step, start=start) - + traceplot(trace) plot_posterior(trace) forestplot(trace) densityplot(trace) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on GPU due to cores=2") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on GPU due to cores=2" +) def test_multichain_plots(): model = build_disaster_model() with model: # Run sampler step1 = Slice([model.early_mean_log__, model.late_mean_log__]) step2 = Metropolis([model.switchpoint]) - start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50} + start = {"early_mean": 2.0, "late_mean": 3.0, "switchpoint": 50} ptrace = sample(1000, tune=0, step=[step1, step2], start=start, cores=2) - forestplot(ptrace, varnames=['early_mean', 'late_mean']) - autocorrplot(ptrace, varnames=['switchpoint']) + forestplot(ptrace, varnames=["early_mean", "late_mean"]) + autocorrplot(ptrace, varnames=["switchpoint"]) plot_posterior(ptrace) @@ -97,7 +109,7 @@ def test_make_2d(): def test_plots_transformed(): with pm.Model(): - pm.Uniform('x', 0, 1) + pm.Uniform("x", 0, 1) step = pm.Metropolis() trace = pm.sample(100, tune=0, step=step, chains=1) @@ -106,10 +118,10 @@ def test_plots_transformed(): assert autocorrplot(trace).shape == (1, 1) assert autocorrplot(trace, plot_transformed=True).shape == (2, 1) assert plot_posterior(trace).numCols == 1 - assert plot_posterior(trace, plot_transformed=True).shape == (2, ) + assert plot_posterior(trace, plot_transformed=True).shape == (2,) with pm.Model(): - pm.Uniform('x', 0, 1) + pm.Uniform("x", 0, 1) step = pm.Metropolis() trace = pm.sample(100, tune=0, step=step, chains=2) @@ -118,17 +130,17 @@ def test_plots_transformed(): assert autocorrplot(trace).shape == (1, 2) assert autocorrplot(trace, plot_transformed=True).shape == (2, 2) assert plot_posterior(trace).numCols == 1 - assert plot_posterior(trace, plot_transformed=True).shape == (2, ) + assert plot_posterior(trace, plot_transformed=True).shape == (2,) + def test_pairplot(): with pm.Model() as model: - a = pm.Normal('a', shape=2) - c = pm.HalfNormal('c', shape=2) - b = pm.Normal('b', a, c, shape=2) - d = pm.Normal('d', 100, 1) + a = pm.Normal("a", shape=2) + c = pm.HalfNormal("c", shape=2) + b = pm.Normal("b", a, c, shape=2) + d = pm.Normal("d", 100, 1) trace = pm.sample(1000) pairplot(trace) pairplot(trace, hexbin=True, plot_transformed=True) - pairplot(trace, sub_varnames=['a_0', 'c_0', 'b_1']) - \ No newline at end of file + pairplot(trace, sub_varnames=["a_0", "c_0", "b_1"]) diff --git a/pymc3/tests/test_posdef_sym.py b/pymc3/tests/test_posdef_sym.py index 6ae5c731e2..e2ceb7a5ad 100644 --- a/pymc3/tests/test_posdef_sym.py +++ b/pymc3/tests/test_posdef_sym.py @@ -4,12 +4,12 @@ def test_posdef_symmetric1(): - data = np.array([[1., 0], [0, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 0], [0, 1]], dtype=theano.config.floatX) assert mv.posdef(data) == 1 def test_posdef_symmetric2(): - data = np.array([[1., 2], [2, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 2], [2, 1]], dtype=theano.config.floatX) assert mv.posdef(data) == 0 @@ -18,13 +18,11 @@ def test_posdef_symmetric3(): Is this correct? """ - data = np.array([[1., 1], [1, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 1], [1, 1]], dtype=theano.config.floatX) assert mv.posdef(data) == 0 def test_posdef_symmetric4(): - d = np.array([[1, .99, 1], - [.99, 1, .999], - [1, .999, 1]], theano.config.floatX) + d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], theano.config.floatX) assert mv.posdef(d) == 0 diff --git a/pymc3/tests/test_posteriors.py b/pymc3/tests/test_posteriors.py index 5d2c6ef356..90ed193226 100644 --- a/pymc3/tests/test_posteriors.py +++ b/pymc3/tests/test_posteriors.py @@ -2,7 +2,10 @@ from . import sampler_fixtures as sf import theano -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestNUTSUniform(sf.NutsFixture, sf.UniformFixture): n_samples = 10000 tune = 1000 @@ -34,19 +37,19 @@ class TestSliceUniform(sf.SliceFixture, sf.UniformFixture): class TestNUTSUniform2(TestNUTSUniform): - step_args = {'target_accept': 0.95, 'integrator': 'two-stage'} + step_args = {"target_accept": 0.95, "integrator": "two-stage"} class TestNUTSUniform3(TestNUTSUniform): - step_args = {'target_accept': 0.80, 'integrator': 'two-stage'} + step_args = {"target_accept": 0.80, "integrator": "two-stage"} class TestNUTSUniform4(TestNUTSUniform): - step_args = {'target_accept': 0.95, 'integrator': 'three-stage'} + step_args = {"target_accept": 0.95, "integrator": "three-stage"} class TestNUTSUniform5(TestNUTSUniform): - step_args = {'target_accept': 0.80, 'integrator': 'three-stage'} + step_args = {"target_accept": 0.80, "integrator": "three-stage"} class TestNUTSNormal(sf.NutsFixture, sf.NormalFixture): @@ -78,7 +81,7 @@ class TestNUTSStudentT(sf.NutsFixture, sf.StudentTFixture): atol = 0.05 -@pytest.mark.skip('Takes too long to run') +@pytest.mark.skip("Takes too long to run") class TestNUTSNormalLong(sf.NutsFixture, sf.NormalFixture): n_samples = 500000 tune = 5000 diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py index c8838f0418..a800691674 100644 --- a/pymc3/tests/test_quadpotential.py +++ b/pymc3/tests/test_quadpotential.py @@ -38,16 +38,16 @@ def test_equal_diag(): x = floatX(np.random.randn(5)) pots = [ quadpotential.quad_potential(diag, False), - quadpotential.quad_potential(1. / diag, True), + quadpotential.quad_potential(1.0 / diag, True), quadpotential.quad_potential(np.diag(diag), False), - quadpotential.quad_potential(np.diag(1. / diag), True), + quadpotential.quad_potential(np.diag(1.0 / diag), True), ] if quadpotential.chol_available: - diag_ = scipy.sparse.csc_matrix(np.diag(1. / diag)) + diag_ = scipy.sparse.csc_matrix(np.diag(1.0 / diag)) pots.append(quadpotential.quad_potential(diag_, True)) - v = np.diag(1. / diag).dot(x) - e = x.dot(np.diag(1. / diag).dot(x)) / 2 + v = np.diag(1.0 / diag).dot(x) + e = x.dot(np.diag(1.0 / diag).dot(x)) / 2 for pot in pots: v_ = pot.velocity(x) e_ = pot.energy(x) @@ -85,9 +85,9 @@ def test_random_diag(): np.random.seed(42) pots = [ quadpotential.quad_potential(d, True), - quadpotential.quad_potential(1./d, False), + quadpotential.quad_potential(1.0 / d, False), quadpotential.quad_potential(np.diag(d), True), - quadpotential.quad_potential(np.diag(1./d), False), + quadpotential.quad_potential(np.diag(1.0 / d), False), ] if quadpotential.chol_available: d_ = scipy.sparse.csc_matrix(np.diag(d)) @@ -95,7 +95,7 @@ def test_random_diag(): pots.append(pot) for pot in pots: vals = np.array([pot.random() for _ in range(1000)]) - npt.assert_allclose(vals.std(0), np.sqrt(1./d), atol=0.1) + npt.assert_allclose(vals.std(0), np.sqrt(1.0 / d), atol=0.1) def test_random_dense(): diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py index f61fe3b4a7..10097e7606 100644 --- a/pymc3/tests/test_random.py +++ b/pymc3/tests/test_random.py @@ -10,28 +10,28 @@ def test_draw_value(): npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6]) - npt.assert_equal(_draw_value(np.array(5.)), 5) + npt.assert_equal(_draw_value(np.array(5.0)), 5) - npt.assert_equal(_draw_value(tt.constant([5., 6.])), [5, 6]) + npt.assert_equal(_draw_value(tt.constant([5.0, 6.0])), [5, 6]) assert _draw_value(tt.constant(5)) == 5 - npt.assert_equal(_draw_value(2 * tt.constant([5., 6.])), [10, 12]) + npt.assert_equal(_draw_value(2 * tt.constant([5.0, 6.0])), [10, 12]) - val = theano.shared(np.array([5., 6.])) + val = theano.shared(np.array([5.0, 6.0])) npt.assert_equal(_draw_value(val), [5, 6]) npt.assert_equal(_draw_value(2 * val), [10, 12]) - a = tt.scalar('a') + a = tt.scalar("a") a.tag.test_value = 6 npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2) assert _draw_value(5) == 5 - assert _draw_value(5.) == 5 - assert isinstance(_draw_value(5.), type(5.)) + assert _draw_value(5.0) == 5 + assert isinstance(_draw_value(5.0), type(5.0)) assert isinstance(_draw_value(5), type(5)) with pm.Model(): - mu = 2 * tt.constant(np.array([5., 6.])) + theano.shared(np.array(5)) - a = pm.Normal('a', mu=mu, sd=5, shape=2) + mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5)) + a = pm.Normal("a", mu=mu, sd=5, shape=2) val1 = _draw_value(a) val2 = _draw_value(a) @@ -39,7 +39,7 @@ def test_draw_value(): with pytest.raises(ValueError) as err: _draw_value([]) - err.match('Unexpected type') + err.match("Unexpected type") class TestDrawValues(object): @@ -48,43 +48,50 @@ def test_empty(self): def test_vals(self): npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6]) - npt.assert_equal(draw_values([np.array(5.)])[0], 5) + npt.assert_equal(draw_values([np.array(5.0)])[0], 5) - npt.assert_equal(draw_values([tt.constant([5., 6.])])[0], [5, 6]) + npt.assert_equal(draw_values([tt.constant([5.0, 6.0])])[0], [5, 6]) assert draw_values([tt.constant(5)])[0] == 5 - npt.assert_equal(draw_values([2 * tt.constant([5., 6.])])[0], [10, 12]) + npt.assert_equal(draw_values([2 * tt.constant([5.0, 6.0])])[0], [10, 12]) - val = theano.shared(np.array([5., 6.])) + val = theano.shared(np.array([5.0, 6.0])) npt.assert_equal(draw_values([val])[0], [5, 6]) npt.assert_equal(draw_values([2 * val])[0], [10, 12]) def test_simple_model(self): with pm.Model(): - mu = 2 * tt.constant(np.array([5., 6.])) + theano.shared(np.array(5)) - a = pm.Normal('a', mu=mu, sd=5, shape=2) + mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5)) + a = pm.Normal("a", mu=mu, sd=5, shape=2) val1 = draw_values([a]) val2 = draw_values([a]) assert np.all(val1[0] != val2[0]) - point = {'a': np.array([3., 4.])} - npt.assert_equal(draw_values([a], point=point), [point['a']]) + point = {"a": np.array([3.0, 4.0])} + npt.assert_equal(draw_values([a], point=point), [point["a"]]) def test_dep_vars(self): with pm.Model(): - mu = 2 * tt.constant(np.array([5., 6.])) + theano.shared(np.array(5)) - sd = pm.HalfNormal('sd', shape=2) + mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5)) + sd = pm.HalfNormal("sd", shape=2) tau = 1 / sd ** 2 - a = pm.Normal('a', mu=mu, tau=tau, shape=2) + a = pm.Normal("a", mu=mu, tau=tau, shape=2) - point = {'a': np.array([1., 2.])} - npt.assert_equal(draw_values([a], point=point), [point['a']]) + point = {"a": np.array([1.0, 2.0])} + npt.assert_equal(draw_values([a], point=point), [point["a"]]) val1 = draw_values([a])[0] - val2 = draw_values([a], point={'sd': np.array([2., 3.])})[0] - val3 = draw_values([a], point={'sd_log__': np.array([2., 3.])})[0] - val4 = draw_values([a], point={'sd_log__': np.array([2., 3.])})[0] - - assert all([np.all(val1 != val2), np.all(val1 != val3), - np.all(val1 != val4), np.all(val2 != val3), - np.all(val2 != val4), np.all(val3 != val4)]) + val2 = draw_values([a], point={"sd": np.array([2.0, 3.0])})[0] + val3 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0] + val4 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0] + + assert all( + [ + np.all(val1 != val2), + np.all(val1 != val3), + np.all(val1 != val4), + np.all(val2 != val3), + np.all(val2 != val4), + np.all(val3 != val4), + ] + ) diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 4adb5ed487..cadafbe810 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -18,7 +18,9 @@ import pytest -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestSample(SeededTest): def setup_method(self): super(TestSample, self).setup_method() @@ -43,10 +45,10 @@ def test_parallel_sample_does_not_reuse_seed(self): trace = pm.sample(100, tune=0, cores=cores) # numpy thread mentioned race condition. might as well check none are equal for first, second in combinations(range(cores), 2): - first_chain = trace.get_values('x', chains=first) - second_chain = trace.get_values('x', chains=second) + first_chain = trace.get_values("x", chains=first) + second_chain = trace.get_values("x", chains=second) assert not (first_chain == second_chain).all() - draws.append(trace.get_values('x')) + draws.append(trace.get_values("x")) random_numbers.append(np.random.random()) # Make sure future random processes aren't effected by this @@ -58,77 +60,90 @@ def test_sample(self): with self.model: for cores in test_cores: for steps in [1, 10, 300]: - pm.sample(steps, tune=0, step=self.step, cores=cores, - random_seed=self.random_seed) + pm.sample( + steps, + tune=0, + step=self.step, + cores=cores, + random_seed=self.random_seed, + ) def test_sample_init(self): with self.model: - for init in ('advi', 'advi_map', 'map', 'nuts'): - pm.sample(init=init, tune=0, - n_init=1000, draws=50, - random_seed=self.random_seed) + for init in ("advi", "advi_map", "map", "nuts"): + pm.sample( + init=init, + tune=0, + n_init=1000, + draws=50, + random_seed=self.random_seed, + ) def test_sample_args(self): with self.model: with pytest.raises(TypeError) as excinfo: - pm.sample(50, tune=0, init=None, step_kwargs={'nuts': {'foo': 1}}) + pm.sample(50, tune=0, init=None, step_kwargs={"nuts": {"foo": 1}}) assert "'foo'" in str(excinfo.value) with pytest.raises(ValueError) as excinfo: - pm.sample(50, tune=0, init=None, step_kwargs={'foo': {}}) - assert 'foo' in str(excinfo.value) + pm.sample(50, tune=0, init=None, step_kwargs={"foo": {}}) + assert "foo" in str(excinfo.value) - pm.sample(10, tune=0, init=None, nuts_kwargs={'target_accept': 0.9}) + pm.sample(10, tune=0, init=None, nuts_kwargs={"target_accept": 0.9}) with pytest.raises(ValueError) as excinfo: pm.sample(5, tune=0, init=None, step_kwargs={}, nuts_kwargs={}) - assert 'Specify only one' in str(excinfo.value) + assert "Specify only one" in str(excinfo.value) def test_iter_sample(self): with self.model: - samps = pm.sampling.iter_sample(draws=5, step=self.step, - start=self.start, tune=0, - random_seed=self.random_seed) + samps = pm.sampling.iter_sample( + draws=5, + step=self.step, + start=self.start, + tune=0, + random_seed=self.random_seed, + ) for i, trace in enumerate(samps): assert i == len(trace) - 1, "Trace does not have correct length." def test_parallel_start(self): with self.model: - tr = pm.sample(0, tune=5, cores=2, - discard_tuned_samples=False, - start=[{'x': [10, 10]}, {'x': [-10, -10]}], - random_seed=self.random_seed) - assert tr.get_values('x', chains=0)[0][0] > 0 - assert tr.get_values('x', chains=1)[0][0] < 0 + tr = pm.sample( + 0, + tune=5, + cores=2, + discard_tuned_samples=False, + start=[{"x": [10, 10]}, {"x": [-10, -10]}], + random_seed=self.random_seed, + ) + assert tr.get_values("x", chains=0)[0][0] > 0 + assert tr.get_values("x", chains=1)[0][0] < 0 def test_sample_tune_len(self): with self.model: trace = pm.sample(draws=100, tune=50, cores=1) assert len(trace) == 100 - trace = pm.sample(draws=100, tune=50, cores=1, - discard_tuned_samples=False) + trace = pm.sample(draws=100, tune=50, cores=1, discard_tuned_samples=False) assert len(trace) == 150 trace = pm.sample(draws=100, tune=50, cores=4) assert len(trace) == 100 @pytest.mark.parametrize( - 'start, error', [ + "start, error", + [ ([1, 2], TypeError), - ({'x': 1}, ValueError), - ({'x': [1, 2, 3]}, ValueError), - ({'x': np.array([[1, 1], [1, 1]])}, ValueError) - ] + ({"x": 1}, ValueError), + ({"x": [1, 2, 3]}, ValueError), + ({"x": np.array([[1, 1], [1, 1]])}, ValueError), + ], ) def test_sample_start_bad_shape(self, start, error): with pytest.raises(error): pm.sampling._check_start_shape(self.model, start) @pytest.mark.parametrize( - 'start', [ - {'x': np.array([1, 1])}, - {'x': [10, 10]}, - {'x': [-10, -10]}, - ] + "start", [{"x": np.array([1, 1])}, {"x": [10, 10]}, {"x": [-10, -10]}] ) def test_sample_start_good_shape(self, start): pm.sampling._check_start_shape(self.model, start) @@ -136,86 +151,101 @@ def test_sample_start_good_shape(self, start): def test_empty_model(): with pm.Model(): - pm.Normal('a', observed=1) + pm.Normal("a", observed=1) with pytest.raises(ValueError) as error: pm.sample() - error.match('any free variables') + error.match("any free variables") def test_partial_trace_sample(): with pm.Model() as model: - a = pm.Normal('a', mu=0, sd=1) - b = pm.Normal('b', mu=0, sd=1) + a = pm.Normal("a", mu=0, sd=1) + b = pm.Normal("b", mu=0, sd=1) trace = pm.sample(trace=[a]) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestNamedSampling(SeededTest): def test_shared_named(self): - G_var = shared(value=np.atleast_2d(1.), broadcastable=(True, False), - name="G") + G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False), name="G") with pm.Model(): - theta0 = pm.Normal('theta0', mu=np.atleast_2d(0), - tau=np.atleast_2d(1e20), shape=(1, 1), - testval=np.atleast_2d(0)) - theta = pm.Normal('theta', mu=tt.dot(G_var, theta0), - tau=np.atleast_2d(1e20), shape=(1, 1)) + theta0 = pm.Normal( + "theta0", + mu=np.atleast_2d(0), + tau=np.atleast_2d(1e20), + shape=(1, 1), + testval=np.atleast_2d(0), + ) + theta = pm.Normal( + "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + ) res = theta.random() - assert np.isclose(res, 0.) + assert np.isclose(res, 0.0) def test_shared_unnamed(self): - G_var = shared(value=np.atleast_2d(1.), broadcastable=(True, False)) + G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False)) with pm.Model(): - theta0 = pm.Normal('theta0', mu=np.atleast_2d(0), - tau=np.atleast_2d(1e20), shape=(1, 1), - testval=np.atleast_2d(0)) - theta = pm.Normal('theta', mu=tt.dot(G_var, theta0), - tau=np.atleast_2d(1e20), shape=(1, 1)) + theta0 = pm.Normal( + "theta0", + mu=np.atleast_2d(0), + tau=np.atleast_2d(1e20), + shape=(1, 1), + testval=np.atleast_2d(0), + ) + theta = pm.Normal( + "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + ) res = theta.random() - assert np.isclose(res, 0.) + assert np.isclose(res, 0.0) def test_constant_named(self): - G_var = tt.constant(np.atleast_2d(1.), name="G") + G_var = tt.constant(np.atleast_2d(1.0), name="G") with pm.Model(): - theta0 = pm.Normal('theta0', mu=np.atleast_2d(0), - tau=np.atleast_2d(1e20), shape=(1, 1), - testval=np.atleast_2d(0)) - theta = pm.Normal('theta', mu=tt.dot(G_var, theta0), - tau=np.atleast_2d(1e20), shape=(1, 1)) + theta0 = pm.Normal( + "theta0", + mu=np.atleast_2d(0), + tau=np.atleast_2d(1e20), + shape=(1, 1), + testval=np.atleast_2d(0), + ) + theta = pm.Normal( + "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + ) res = theta.random() - assert np.isclose(res, 0.) + assert np.isclose(res, 0.0) class TestChooseBackend(object): def test_choose_backend_none(self): - with mock.patch('pymc3.sampling.NDArray') as nd: - pm.sampling._choose_backend(None, 'chain') + with mock.patch("pymc3.sampling.NDArray") as nd: + pm.sampling._choose_backend(None, "chain") assert nd.called def test_choose_backend_list_of_variables(self): - with mock.patch('pymc3.sampling.NDArray') as nd: - pm.sampling._choose_backend(['var1', 'var2'], 'chain') - nd.assert_called_with(vars=['var1', 'var2']) + with mock.patch("pymc3.sampling.NDArray") as nd: + pm.sampling._choose_backend(["var1", "var2"], "chain") + nd.assert_called_with(vars=["var1", "var2"]) def test_choose_backend_invalid(self): with pytest.raises(ValueError): - pm.sampling._choose_backend('invalid', 'chain') + pm.sampling._choose_backend("invalid", "chain") def test_choose_backend_shortcut(self): backend = mock.Mock() - shortcuts = {'test_backend': {'backend': backend, - 'name': None}} - pm.sampling._choose_backend('test_backend', 'chain', shortcuts=shortcuts) + shortcuts = {"test_backend": {"backend": backend, "name": None}} + pm.sampling._choose_backend("test_backend", "chain", shortcuts=shortcuts) assert backend.called class TestSamplePPC(SeededTest): def test_normal_scalar(self): with pm.Model() as model: - mu = pm.Normal('mu', 0., 1.) - a = pm.Normal('a', mu=mu, sd=1, observed=0.) + mu = pm.Normal("mu", 0.0, 1.0) + a = pm.Normal("a", mu=mu, sd=1, observed=0.0) trace = pm.sample() with model: @@ -224,21 +254,19 @@ def test_normal_scalar(self): ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[]) assert len(ppc) == 0 ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[a]) - assert 'a' in ppc - assert ppc['a'].shape == (1000,) - _, pval = stats.kstest(ppc['a'], - stats.norm(loc=0, scale=np.sqrt(2)).cdf) + assert "a" in ppc + assert ppc["a"].shape == (1000,) + _, pval = stats.kstest(ppc["a"], stats.norm(loc=0, scale=np.sqrt(2)).cdf) assert pval > 0.001 with model: ppc = pm.sample_posterior_predictive(trace, samples=10, size=5, vars=[a]) - assert ppc['a'].shape == (10, 5) + assert ppc["a"].shape == (10, 5) def test_normal_vector(self): with pm.Model() as model: - mu = pm.Normal('mu', 0., 1.) - a = pm.Normal('a', mu=mu, sd=1, - observed=np.array([.5, .2])) + mu = pm.Normal("mu", 0.0, 1.0) + a = pm.Normal("a", mu=mu, sd=1, observed=np.array([0.5, 0.2])) trace = pm.sample() with model: @@ -247,18 +275,17 @@ def test_normal_vector(self): ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[]) assert len(ppc) == 0 ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a]) - assert 'a' in ppc - assert ppc['a'].shape == (10, 2) + assert "a" in ppc + assert ppc["a"].shape == (10, 2) ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a], size=4) - assert 'a' in ppc - assert ppc['a'].shape == (10, 4, 2) + assert "a" in ppc + assert ppc["a"].shape == (10, 4, 2) def test_vector_observed(self): with pm.Model() as model: - mu = pm.Normal('mu', mu=0, sd=1) - a = pm.Normal('a', mu=mu, sd=1, - observed=np.array([0., 1.])) + mu = pm.Normal("mu", mu=0, sd=1) + a = pm.Normal("a", mu=mu, sd=1, observed=np.array([0.0, 1.0])) trace = pm.sample() with model: @@ -267,17 +294,17 @@ def test_vector_observed(self): ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[]) assert len(ppc) == 0 ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a]) - assert 'a' in ppc - assert ppc['a'].shape == (10, 2) + assert "a" in ppc + assert ppc["a"].shape == (10, 2) ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a], size=4) - assert 'a' in ppc - assert ppc['a'].shape == (10, 4, 2) + assert "a" in ppc + assert ppc["a"].shape == (10, 4, 2) def test_sum_normal(self): with pm.Model() as model: - a = pm.Normal('a', sd=0.2) - b = pm.Normal('b', mu=a) + a = pm.Normal("a", sd=0.2) + b = pm.Normal("b", mu=a) trace = pm.sample() with model: @@ -285,9 +312,9 @@ def test_sum_normal(self): ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10) ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[b]) assert len(ppc) == 1 - assert ppc['b'].shape == (1000,) + assert ppc["b"].shape == (1000,) scale = np.sqrt(1 + 0.2 ** 2) - _, pval = stats.kstest(ppc['b'], stats.norm(scale=scale).cdf) + _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf) assert pval > 0.001 @@ -296,85 +323,97 @@ def test_sample_posterior_predictive_w(self): data0 = np.random.normal(0, 1, size=500) with pm.Model() as model_0: - mu = pm.Normal('mu', mu=0, sd=1) - y = pm.Normal('y', mu=mu, sd=1, observed=data0) + mu = pm.Normal("mu", mu=0, sd=1) + y = pm.Normal("y", mu=mu, sd=1, observed=data0) trace_0 = pm.sample() with pm.Model() as model_1: - mu = pm.Normal('mu', mu=0, sd=1, shape=len(data0)) - y = pm.Normal('y', mu=mu, sd=1, observed=data0) + mu = pm.Normal("mu", mu=0, sd=1, shape=len(data0)) + y = pm.Normal("y", mu=mu, sd=1, observed=data0) trace_1 = pm.sample() traces = [trace_0, trace_0] models = [model_0, model_0] ppc = pm.sample_posterior_predictive_w(traces, 100, models) - assert ppc['y'].shape == (100, 500) + assert ppc["y"].shape == (100, 500) traces = [trace_0, trace_1] models = [model_0, model_1] ppc = pm.sample_posterior_predictive_w(traces, 100, models) - assert ppc['y'].shape == (100, 500) - - -@pytest.mark.parametrize('method', [ - 'jitter+adapt_diag', 'adapt_diag', 'advi', 'ADVI+adapt_diag', - 'advi+adapt_diag_grad', 'map', 'advi_map', 'nuts' -]) + assert ppc["y"].shape == (100, 500) + + +@pytest.mark.parametrize( + "method", + [ + "jitter+adapt_diag", + "adapt_diag", + "advi", + "ADVI+adapt_diag", + "advi+adapt_diag_grad", + "map", + "advi_map", + "nuts", + ], +) def test_exec_nuts_init(method): with pm.Model() as model: - pm.Normal('a', mu=0, sd=1, shape=2) - pm.HalfNormal('b', sd=1) + pm.Normal("a", mu=0, sd=1, shape=2) + pm.HalfNormal("b", sd=1) with model: start, _ = pm.init_nuts(init=method, n_init=10) assert isinstance(start, list) assert len(start) == 1 assert isinstance(start[0], dict) - assert 'a' in start[0] and 'b_log__' in start[0] + assert "a" in start[0] and "b_log__" in start[0] start, _ = pm.init_nuts(init=method, n_init=10, chains=2) assert isinstance(start, list) assert len(start) == 2 assert isinstance(start[0], dict) - assert 'a' in start[0] and 'b_log__' in start[0] + assert "a" in start[0] and "b_log__" in start[0] + class TestSamplePriorPredictive(SeededTest): def test_ignores_observed(self): observed = np.random.normal(10, 1, size=200) with pm.Model(): # Use a prior that's way off to show we're ignoring the observed variables - mu = pm.Normal('mu', mu=-100, sd=1) - positive_mu = pm.Deterministic('positive_mu', np.abs(mu)) + mu = pm.Normal("mu", mu=-100, sd=1) + positive_mu = pm.Deterministic("positive_mu", np.abs(mu)) z = -1 - positive_mu - pm.Normal('x_obs', mu=z, sd=1, observed=observed) + pm.Normal("x_obs", mu=z, sd=1, observed=observed) prior = pm.sample_prior_predictive() - assert (prior['mu'] < 90).all() - assert (prior['positive_mu'] > 90).all() - assert (prior['x_obs'] < 90).all() - assert prior['x_obs'].shape == (500, 200) - npt.assert_array_almost_equal(prior['positive_mu'], np.abs(prior['mu']), decimal=4) + assert (prior["mu"] < 90).all() + assert (prior["positive_mu"] > 90).all() + assert (prior["x_obs"] < 90).all() + assert prior["x_obs"].shape == (500, 200) + npt.assert_array_almost_equal( + prior["positive_mu"], np.abs(prior["mu"]), decimal=4 + ) def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): - mu = pm.Gamma('mu', 3, 1, shape=1) - goals = pm.Poisson('goals', mu, shape=shape) + mu = pm.Gamma("mu", 3, 1, shape=1) + goals = pm.Poisson("goals", mu, shape=shape) trace = pm.sample_prior_predictive(10) if shape == 2: # want to test shape as an int shape = (2,) - assert trace['goals'].shape == (10,) + shape + assert trace["goals"].shape == (10,) + shape def test_multivariate(self): with pm.Model(): - m = pm.Multinomial('m', n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4) + m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4) trace = pm.sample_prior_predictive(10) assert m.random(size=10).shape == (10, 4) - assert trace['m'].shape == (10, 4) + assert trace["m"].shape == (10, 4) def test_layers(self): with pm.Model() as model: - a = pm.Uniform('a', lower=0, upper=1, shape=10) - b = pm.Binomial('b', n=1, p=a, shape=10) + a = pm.Uniform("a", lower=0, upper=1, shape=10) + b = pm.Binomial("b", n=1, p=a, shape=10) avg = b.random(size=10000).mean(axis=0) npt.assert_array_almost_equal(avg, 0.5 * np.ones_like(b), decimal=2) @@ -386,54 +425,61 @@ def test_transformed(self): draws = 50 with pm.Model() as model: - phi = pm.Beta('phi', alpha=1., beta=1.) + phi = pm.Beta("phi", alpha=1.0, beta=1.0) - kappa_log = pm.Exponential('logkappa', lam=5.) - kappa = pm.Deterministic('kappa', tt.exp(kappa_log)) + kappa_log = pm.Exponential("logkappa", lam=5.0) + kappa = pm.Deterministic("kappa", tt.exp(kappa_log)) - thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=n) + thetas = pm.Beta( + "thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n + ) - y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits) + y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) - assert gen['phi'].shape == (draws,) - assert gen['y'].shape == (draws, n) - assert 'thetas_logodds__' in gen + assert gen["phi"].shape == (draws,) + assert gen["y"].shape == (draws, n) + assert "thetas_logodds__" in gen def test_shared(self): n1 = 10 - obs = shared(np.random.rand(n1) < .5) + obs = shared(np.random.rand(n1) < 0.5) draws = 50 with pm.Model() as m: - p = pm.Beta('p', 1., 1.) - y = pm.Bernoulli('y', p, observed=obs) + p = pm.Beta("p", 1.0, 1.0) + y = pm.Bernoulli("y", p, observed=obs) gen1 = pm.sample_prior_predictive(draws) - assert gen1['y'].shape == (draws, n1) + assert gen1["y"].shape == (draws, n1) n2 = 20 - obs.set_value(np.random.rand(n2) < .5) + obs.set_value(np.random.rand(n2) < 0.5) with m: gen2 = pm.sample_prior_predictive(draws) - assert gen2['y'].shape == (draws, n2) + assert gen2["y"].shape == (draws, n2) def test_density_dist(self): obs = np.random.normal(-1, 0.1, size=10) with pm.Model(): - mu = pm.Normal('mu', 0, 1) - sd = pm.Gamma('sd', 1, 2) - a = pm.DensityDist('a', pm.Normal.dist(mu, sd).logp, random=pm.Normal.dist(mu, sd).random, observed=obs) + mu = pm.Normal("mu", 0, 1) + sd = pm.Gamma("sd", 1, 2) + a = pm.DensityDist( + "a", + pm.Normal.dist(mu, sd).logp, + random=pm.Normal.dist(mu, sd).random, + observed=obs, + ) prior = pm.sample_prior_predictive() - npt.assert_almost_equal(prior['a'].mean(), 0, decimal=1) + npt.assert_almost_equal(prior["a"].mean(), 0, decimal=1) def test_shape_edgecase(self): with pm.Model(): - mu = pm.Normal('mu', shape=5) - sd = pm.Uniform('sd', lower=2, upper=3) - x = pm.Normal('x', mu=mu, sd=sd, shape=5) + mu = pm.Normal("mu", shape=5) + sd = pm.Uniform("sd", lower=2, upper=3) + x = pm.Normal("x", mu=mu, sd=sd, shape=5) prior = pm.sample_prior_predictive(10) - assert prior['mu'].shape == (10, 5) + assert prior["mu"].shape == (10, 5) diff --git a/pymc3/tests/test_sgfs.py b/pymc3/tests/test_sgfs.py index 06d8749443..421292bf40 100644 --- a/pymc3/tests/test_sgfs.py +++ b/pymc3/tests/test_sgfs.py @@ -3,34 +3,41 @@ from pymc3 import Model, Normal import theano.tensor as tt + def test_minibatch(): draws = 3000 mu0 = 1 sd0 = 1 - + def f(x, a, b, c): - return a*x**2 + b*x + c - + return a * x ** 2 + b * x + c + a, b, c = 1, 2, 3 batch_size = 50 - total_size = batch_size*500 - x_train = np.random.uniform(-10, 10, size=(total_size,)).astype('float32') + total_size = batch_size * 500 + x_train = np.random.uniform(-10, 10, size=(total_size,)).astype("float32") x_obs = pm.data.Minibatch(x_train, batch_size=batch_size) - y_train = f(x_train, a, b, c) + np.random.normal(size=x_train.shape).astype('float32') + y_train = f(x_train, a, b, c) + np.random.normal(size=x_train.shape).astype( + "float32" + ) y_obs = pm.data.Minibatch(y_train, batch_size=batch_size) with Model(): - abc = Normal('abc', mu=mu0, sd=sd0, shape=(3,)) + abc = Normal("abc", mu=mu0, sd=sd0, shape=(3,)) x = x_obs - x2 = x**2 + x2 = x ** 2 o = tt.ones_like(x) X = tt.stack([x2, x, o]).T y = X.dot(abc) - pm.Normal('y', mu=y, observed=y_obs) + pm.Normal("y", mu=y, observed=y_obs) - step_method = pm.SGFS(batch_size=batch_size, step_size=1., total_size=total_size) + step_method = pm.SGFS( + batch_size=batch_size, step_size=1.0, total_size=total_size + ) trace = pm.sample(draws=draws, step=step_method, init=None, cores=2) - np.testing.assert_allclose(np.mean(trace['abc'], axis=0), np.asarray([a, b, c]), rtol=0.1) + np.testing.assert_allclose( + np.mean(trace["abc"], axis=0), np.asarray([a, b, c]), rtol=0.1 + ) diff --git a/pymc3/tests/test_shared.py b/pymc3/tests/test_shared.py index 6106c1c68b..e4b1b3b2c9 100644 --- a/pymc3/tests/test_shared.py +++ b/pymc3/tests/test_shared.py @@ -7,9 +7,11 @@ class TestShared(SeededTest): def test_deterministic(self): with pm.Model() as model: - data_values = np.array([.5, .4, 5, 2]) - X = theano.shared(np.asarray(data_values, dtype=theano.config.floatX), borrow=True) - pm.Normal('y', 0, 1, observed=X) + data_values = np.array([0.5, 0.4, 5, 2]) + X = theano.shared( + np.asarray(data_values, dtype=theano.config.floatX), borrow=True + ) + pm.Normal("y", 0, 1, observed=X) model.logp(model.test_point) def test_sample(self): @@ -21,8 +23,8 @@ def test_sample(self): x_shared = theano.shared(x) with pm.Model() as model: - b = pm.Normal('b', 0., 10.) - pm.Normal('obs', b * x_shared, np.sqrt(1e-2), observed=y) + b = pm.Normal("b", 0.0, 10.0) + pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y) prior_trace0 = pm.sample_prior_predictive(1000) trace = pm.sample(1000, init=None, progressbar=False) @@ -32,10 +34,10 @@ def test_sample(self): prior_trace1 = pm.sample_prior_predictive(1000) pp_trace1 = pm.sample_posterior_predictive(trace, 1000) - assert prior_trace0['b'].shape == (1000,) - assert prior_trace0['obs'].shape == (1000, 100) - np.testing.assert_allclose(x, pp_trace0['obs'].mean(axis=0), atol=1e-1) + assert prior_trace0["b"].shape == (1000,) + assert prior_trace0["obs"].shape == (1000, 100) + np.testing.assert_allclose(x, pp_trace0["obs"].mean(axis=0), atol=1e-1) - assert prior_trace1['b'].shape == (1000,) - assert prior_trace1['obs'].shape == (1000, 200) - np.testing.assert_allclose(x_pred, pp_trace1['obs'].mean(axis=0), atol=1e-1) + assert prior_trace1["b"].shape == (1000,) + assert prior_trace1["obs"].shape == (1000, 200) + np.testing.assert_allclose(x_pred, pp_trace1["obs"].mean(axis=0), atol=1e-1) diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index 90732f8645..987ea8fea6 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -6,13 +6,12 @@ class TestSMC(SeededTest): - def setup_class(self): super(TestSMC, self).setup_class() self.samples = 1000 n = 4 - mu1 = np.ones(n) * (1. / 2) - mu2 = - mu1 + mu1 = np.ones(n) * (1.0 / 2) + mu2 = -mu1 stdev = 0.1 sigma = np.power(stdev, 2) * np.eye(n) @@ -20,46 +19,46 @@ def setup_class(self): dsigma = np.linalg.det(sigma) w1 = stdev - w2 = (1 - stdev) + w2 = 1 - stdev def two_gaussians(x): - log_like1 = - 0.5 * n * tt.log(2 * np.pi) \ - - 0.5 * tt.log(dsigma) \ - - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1) - log_like2 = - 0.5 * n * tt.log(2 * np.pi) \ - - 0.5 * tt.log(dsigma) \ - - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) + log_like1 = ( + -0.5 * n * tt.log(2 * np.pi) + - 0.5 * tt.log(dsigma) + - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1) + ) + log_like2 = ( + -0.5 * n * tt.log(2 * np.pi) + - 0.5 * tt.log(dsigma) + - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) + ) return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2)) with pm.Model() as self.SMC_test: - X = pm.Uniform('X', lower=-2, upper=2., shape=n) - llk = pm.Potential('muh', two_gaussians(X)) + X = pm.Uniform("X", lower=-2, upper=2.0, shape=n) + llk = pm.Potential("muh", two_gaussians(X)) self.muref = mu1 - def test_sample(self): with self.SMC_test: - mtrace = pm.sample(draws=self.samples, - step = pm.SMC()) + mtrace = pm.sample(draws=self.samples, step=pm.SMC()) - x = mtrace['X'] + x = mtrace["X"] mu1d = np.abs(x).mean(axis=0) - np.testing.assert_allclose(self.muref, mu1d, rtol=0., atol=0.03) + np.testing.assert_allclose(self.muref, mu1d, rtol=0.0, atol=0.03) def test_ml(self): data = np.repeat([1, 0], [50, 50]) marginals = [] - a_prior_0, b_prior_0 = 1., 1. - a_prior_1, b_prior_1 = 20., 20. + a_prior_0, b_prior_0 = 1.0, 1.0 + a_prior_1, b_prior_1 = 20.0, 20.0 for alpha, beta in ((a_prior_0, b_prior_0), (a_prior_1, b_prior_1)): with pm.Model() as model: - a = pm.Beta('a', alpha, beta) - y = pm.Bernoulli('y', a, observed=data) + a = pm.Beta("a", alpha, beta) + y = pm.Bernoulli("y", a, observed=data) trace = pm.sample(2000, step=pm.SMC()) marginals.append(model.marginal_likelihood) # compare to the analytical result assert abs((marginals[1] / marginals[0]) - 4.0) <= 1 - - diff --git a/pymc3/tests/test_special_functions.py b/pymc3/tests/test_special_functions.py index c02f3fba0c..16aedbe9c9 100644 --- a/pymc3/tests/test_special_functions.py +++ b/pymc3/tests/test_special_functions.py @@ -9,12 +9,12 @@ def test_functions(): - xvals = list(map(np.atleast_1d, [.01, .1, 2, 100, 10000])) + xvals = list(map(np.atleast_1d, [0.01, 0.1, 2, 100, 10000])) - x = tt.dvector('x') + x = tt.dvector("x") x.tag.test_value = xvals[0] - p = tt.iscalar('p') + p = tt.iscalar("p") p.tag.test_value = 1 gammaln = function([x], ps.gammaln(x)) @@ -25,6 +25,7 @@ def test_functions(): for x in xvals[1:]: check_vals(psi, ss.psi, x) + """ scipy.special.multigammaln gives bad values if you pass a non scalar to a In [14]: @@ -37,12 +38,12 @@ def test_functions(): def t_multigamma(): - xvals = list(map(np.atleast_1d, [0, .1, 2, 100])) + xvals = list(map(np.atleast_1d, [0, 0.1, 2, 100])) - x = tt.dvector('x') + x = tt.dvector("x") x.tag.test_value = xvals[0] - p = tt.iscalar('p') + p = tt.iscalar("p") p.tag.test_value = 1 multigammaln = function([x, p], ps.multigammaln(x, p)) diff --git a/pymc3/tests/test_sqlite_backend.py b/pymc3/tests/test_sqlite_backend.py index 5106e5af19..b805b6a271 100644 --- a/pymc3/tests/test_sqlite_backend.py +++ b/pymc3/tests/test_sqlite_backend.py @@ -5,52 +5,70 @@ import pytest import theano -DBNAME = os.path.join(tempfile.gettempdir(), 'test.db') +DBNAME = os.path.join(tempfile.gettempdir(), "test.db") -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", +) class TestSQlite0dSampling(bf.SamplingTestCase): backend = sqlite.SQLite name = DBNAME shape = () -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestSQlite1dSampling(bf.SamplingTestCase): backend = sqlite.SQLite name = DBNAME shape = 2 -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", +) class TestSQlite2dSampling(bf.SamplingTestCase): backend = sqlite.SQLite name = DBNAME shape = (2, 3) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", +) class TestSQLite0dSelection(bf.SelectionTestCase): backend = sqlite.SQLite name = DBNAME shape = () -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestSQLite1dSelection(bf.SelectionTestCase): backend = sqlite.SQLite name = DBNAME shape = 2 -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestSQLite2dSelection(bf.SelectionTestCase): backend = sqlite.SQLite name = DBNAME shape = (2, 3) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", +) class TestSQLiteDumpLoad(bf.DumpLoadTestCase): backend = sqlite.SQLite load_func = staticmethod(sqlite.load) @@ -58,7 +76,10 @@ class TestSQLiteDumpLoad(bf.DumpLoadTestCase): shape = (2, 3) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to inf issues", +) class TestNDArraySqliteEquality(bf.BackendEqualityTestCase): backend0 = ndarray.NDArray name0 = None diff --git a/pymc3/tests/test_starting.py b/pymc3/tests/test_starting.py index 44819f40bb..c25b6aab8e 100644 --- a/pymc3/tests/test_starting.py +++ b/pymc3/tests/test_starting.py @@ -10,35 +10,37 @@ def test_accuracy_normal(): _, model, (mu, _) = simple_model() with model: newstart = find_MAP(Point(x=[-10.5, 100.5])) - close_to(newstart['x'], [mu, mu], select_by_precision(float64=1e-5, float32=1E-4)) + close_to( + newstart["x"], [mu, mu], select_by_precision(float64=1e-5, float32=1e-4) + ) def test_accuracy_non_normal(): _, model, (mu, _) = non_normal(4) with model: - newstart = find_MAP(Point(x=[.5, .01, .95, .99])) - close_to(newstart['x'], mu, select_by_precision(float64=1e-5, float32=1E-4)) + newstart = find_MAP(Point(x=[0.5, 0.01, 0.95, 0.99])) + close_to(newstart["x"], mu, select_by_precision(float64=1e-5, float32=1e-4)) def test_find_MAP_discrete(): - tol = 2.0**-11 + tol = 2.0 ** -11 alpha = 4 beta = 4 n = 20 yes = 15 with Model() as model: - p = Beta('p', alpha, beta) - Binomial('ss', n=n, p=p) - Binomial('s', n=n, p=p, observed=yes) + p = Beta("p", alpha, beta) + Binomial("ss", n=n, p=p) + Binomial("s", n=n, p=p, observed=yes) map_est1 = starting.find_MAP() map_est2 = starting.find_MAP(vars=model.vars) - close_to(map_est1['p'], 0.6086956533498806, tol) + close_to(map_est1["p"], 0.6086956533498806, tol) - close_to(map_est2['p'], 0.695642178810167, tol) - assert map_est2['ss'] == 14 + close_to(map_est2["p"], 0.695642178810167, tol) + assert map_est2["ss"] == 14 def test_find_MAP_no_gradient(): @@ -48,24 +50,24 @@ def test_find_MAP_no_gradient(): def test_find_MAP(): - tol = 2.0**-11 # 16 bit machine epsilon, a low bar + tol = 2.0 ** -11 # 16 bit machine epsilon, a low bar data = np.random.randn(100) # data should be roughly mean 0, std 1, but let's # normalize anyway to get it really close data = (data - np.mean(data)) / np.std(data) with Model(): - mu = Uniform('mu', -1, 1) - sigma = Uniform('sigma', .5, 1.5) - Normal('y', mu=mu, tau=sigma**-2, observed=data) + mu = Uniform("mu", -1, 1) + sigma = Uniform("sigma", 0.5, 1.5) + Normal("y", mu=mu, tau=sigma ** -2, observed=data) # Test gradient minimization map_est1 = starting.find_MAP(progressbar=False) # Test non-gradient minimization map_est2 = starting.find_MAP(progressbar=False, method="Powell") - close_to(map_est1['mu'], 0, tol) - close_to(map_est1['sigma'], 1, tol) + close_to(map_est1["mu"], 0, tol) + close_to(map_est1["sigma"], 1, tol) - close_to(map_est2['mu'], 0, tol) - close_to(map_est2['sigma'], 1, tol) + close_to(map_est2["mu"], 0, tol) + close_to(map_est2["sigma"], 1, tol) diff --git a/pymc3/tests/test_stats.py b/pymc3/tests/test_stats.py index 3a2e81e783..0206efe2a6 100644 --- a/pymc3/tests/test_stats.py +++ b/pymc3/tests/test_stats.py @@ -6,8 +6,17 @@ from .helpers import SeededTest from ..tests import backend_fixtures as bf from ..backends import ndarray -from ..stats import (summary, autocorr, autocov, hpd, mc_error, quantiles, - make_indices, bfmi, r2_score) +from ..stats import ( + summary, + autocorr, + autocov, + hpd, + mc_error, + quantiles, + make_indices, + bfmi, + r2_score, +) from ..theanof import floatX_array import pymc3.stats as pmstats from numpy.random import random, normal @@ -18,15 +27,15 @@ def test_log_post_trace(): with pm.Model() as model: - pm.Normal('y') + pm.Normal("y") trace = pm.sample(10, tune=10, chains=1) logp = pmstats._log_post_trace(trace, model) assert logp.shape == (len(trace), 0) with pm.Model() as model: - pm.Normal('a') - pm.Normal('y', observed=np.zeros((2, 3))) + pm.Normal("a") + pm.Normal("y", observed=np.zeros((2, 3))) trace = pm.sample(10, tune=10, chains=1) logp = pmstats._log_post_trace(trace, model) @@ -34,14 +43,14 @@ def test_log_post_trace(): npt.assert_allclose(logp, -0.5 * np.log(2 * np.pi), atol=1e-7) with pm.Model() as model: - pm.Normal('a') - pm.Normal('y', observed=np.zeros((2, 3))) + pm.Normal("a") + pm.Normal("y", observed=np.zeros((2, 3))) data = pd.DataFrame(np.zeros((3, 4))) data.values[1, 1] = np.nan - pm.Normal('y2', observed=data) + pm.Normal("y2", observed=data) data = data.copy() data.values[:] = np.nan - pm.Normal('y3', observed=data) + pm.Normal("y3", observed=data) trace = pm.sample(10, tune=10, chains=1) logp = pmstats._log_post_trace(trace, model) @@ -54,18 +63,18 @@ def test_compare(): x_obs = np.random.normal(0, 1, size=100) with pm.Model() as model0: - mu = pm.Normal('mu', 0, 1) - x = pm.Normal('x', mu=mu, sd=1, observed=x_obs) + mu = pm.Normal("mu", 0, 1) + x = pm.Normal("x", mu=mu, sd=1, observed=x_obs) trace0 = pm.sample(1000) with pm.Model() as model1: - mu = pm.Normal('mu', 0, 1) - x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs) + mu = pm.Normal("mu", 0, 1) + x = pm.Normal("x", mu=mu, sd=0.8, observed=x_obs) trace1 = pm.sample(1000) with pm.Model() as model2: - mu = pm.Normal('mu', 0, 1) - x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs) + mu = pm.Normal("mu", 0, 1) + x = pm.StudentT("x", nu=1, mu=mu, lam=1, observed=x_obs) trace2 = pm.sample(1000) traces = [trace0, copy.copy(trace0)] @@ -73,34 +82,34 @@ def test_compare(): model_dict = dict(zip(models, traces)) - w_st = pm.compare(model_dict, method='stacking')['weight'] - w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight'] - w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight'] + w_st = pm.compare(model_dict, method="stacking")["weight"] + w_bb_bma = pm.compare(model_dict, method="BB-pseudo-BMA")["weight"] + w_bma = pm.compare(model_dict, method="pseudo-BMA")["weight"] assert_almost_equal(w_st[0], w_st[1]) assert_almost_equal(w_bb_bma[0], w_bb_bma[1]) assert_almost_equal(w_bma[0], w_bma[1]) - assert_almost_equal(np.sum(w_st), 1.) - assert_almost_equal(np.sum(w_bb_bma), 1.) - assert_almost_equal(np.sum(w_bma), 1.) + assert_almost_equal(np.sum(w_st), 1.0) + assert_almost_equal(np.sum(w_bb_bma), 1.0) + assert_almost_equal(np.sum(w_bma), 1.0) traces = [trace0, trace1, trace2] models = [model0, model1, model2] model_dict = dict(zip(models, traces)) - - w_st = pm.compare(model_dict, method='stacking')['weight'] - w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight'] - w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight'] - assert(w_st[0] > w_st[1] > w_st[2]) - assert(w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2]) - assert(w_bma[0] > w_bma[1] > w_bma[2]) + w_st = pm.compare(model_dict, method="stacking")["weight"] + w_bb_bma = pm.compare(model_dict, method="BB-pseudo-BMA")["weight"] + w_bma = pm.compare(model_dict, method="pseudo-BMA")["weight"] - assert_almost_equal(np.sum(w_st), 1.) - assert_almost_equal(np.sum(w_st), 1.) - assert_almost_equal(np.sum(w_st), 1.) + assert w_st[0] > w_st[1] > w_st[2] + assert w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2] + assert w_bma[0] > w_bma[1] > w_bma[2] + + assert_almost_equal(np.sum(w_st), 1.0) + assert_almost_equal(np.sum(w_st), 1.0) + assert_almost_equal(np.sum(w_st), 1.0) class TestStats(SeededTest): @@ -112,12 +121,15 @@ def setup_class(cls): def test_autocorr(self): """Test autocorrelation and autocovariance functions""" assert_almost_equal(autocorr(self.normal_sample)[1], 0, 2) - y = [(self.normal_sample[i - 1] + self.normal_sample[i]) / - 2 for i in range(1, len(self.normal_sample))] + y = [ + (self.normal_sample[i - 1] + self.normal_sample[i]) / 2 + for i in range(1, len(self.normal_sample)) + ] assert_almost_equal(autocorr(np.asarray(y))[1], 0.5, 2) lag = 5 - acov_np = np.cov(self.normal_sample[:-lag], - self.normal_sample[lag:], bias=1)[0, 1] + acov_np = np.cov(self.normal_sample[:-lag], self.normal_sample[lag:], bias=1)[ + 0, 1 + ] acov_pm = autocov(self.normal_sample)[lag] assert_almost_equal(acov_pm, acov_np, 7) @@ -126,18 +138,18 @@ def test_waic(self): x_obs = np.arange(6) with pm.Model(): - p = pm.Beta('p', 1., 1., transform=None) - pm.Binomial('x', 5, p, observed=x_obs) + p = pm.Beta("p", 1.0, 1.0, transform=None) + pm.Binomial("x", 5, p, observed=x_obs) step = pm.Metropolis() trace = pm.sample(100, step) calculated_waic = pm.waic(trace) - log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace['p']).T + log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace["p"]).T lppd_i = np.log(np.mean(np.exp(log_py), axis=0)) vars_lpd = np.var(log_py, axis=0) - waic_i = - 2 * (lppd_i - vars_lpd) + waic_i = -2 * (lppd_i - vars_lpd) actual_waic_se = np.sqrt(len(waic_i) * np.var(waic_i)) actual_waic = np.sum(waic_i) @@ -157,7 +169,7 @@ def test_make_indices(self): def test_mc_error(self): """Test batch standard deviation function""" - assert(mc_error(random(100000) < 0.0025)) + assert mc_error(random(100000) < 0.0025) def test_quantiles(self): """Test quantiles function""" @@ -170,8 +182,8 @@ def test_summary_0d_variable_model(self): mu = -2.1 tau = 1.3 with Model() as model: - Normal('x', mu, tau, testval=floatX_array(.1)) - step = Metropolis(model.vars, np.diag([1.]), blocked=True) + Normal("x", mu, tau, testval=floatX_array(0.1)) + step = Metropolis(model.vars, np.diag([1.0]), blocked=True) trace = pm.sample(100, step=step) summary(trace) @@ -179,8 +191,8 @@ def test_summary_1d_variable_model(self): mu = -2.1 tau = 1.3 with Model() as model: - Normal('x', mu, tau, shape=2, testval=floatX_array([.1, .1])) - step = Metropolis(model.vars, np.diag([1.]), blocked=True) + Normal("x", mu, tau, shape=2, testval=floatX_array([0.1, 0.1])) + step = Metropolis(model.vars, np.diag([1.0]), blocked=True) trace = pm.sample(100, step=step) summary(trace) @@ -188,9 +200,10 @@ def test_summary_2d_variable_model(self): mu = -2.1 tau = 1.3 with Model() as model: - Normal('x', mu, tau, shape=(2, 2), - testval=floatX_array(np.tile(.1, (2, 2)))) - step = Metropolis(model.vars, np.diag([1.]), blocked=True) + Normal( + "x", mu, tau, shape=(2, 2), testval=floatX_array(np.tile(0.1, (2, 2))) + ) + step = Metropolis(model.vars, np.diag([1.0]), blocked=True) trace = pm.sample(100, step=step) summary(trace) @@ -234,15 +247,14 @@ def test_groupby_leading_idxs_2d_variable(self): def test_groupby_leading_idxs_3d_variable(self): result = {k: list(v) for k, v in pm.stats._groupby_leading_idxs((2, 3, 2))} - expected_keys = [(0, 0), (0, 1), (0, 2), - (1, 0), (1, 1), (1, 2)] + expected_keys = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)] keys = list(result.keys()) assert len(keys) == len(expected_keys) for key in keys: assert result[key] == [key + (0,), key + (1,)] def test_bfmi(self): - trace = {'energy': np.array([1, 2, 3, 4])} + trace = {"energy": np.array([1, 2, 3, 4])} assert_almost_equal(bfmi(trace), 0.8) @@ -250,47 +262,61 @@ def test_r2_score(self): x = np.linspace(0, 1, 100) y = np.random.normal(x, 1) res = st.linregress(x, y) - assert_almost_equal(res.rvalue ** 2, - r2_score(y, res.intercept + - res.slope * x).r2_median, - 2) + assert_almost_equal( + res.rvalue ** 2, r2_score(y, res.intercept + res.slope * x).r2_median, 2 + ) + class TestDfSummary(bf.ModelBackendSampledTestCase): backend = ndarray.NDArray - name = 'text-db' + name = "text-db" shape = (2, 3) def test_column_names(self): ds = summary(self.mtrace, batches=3) - npt.assert_equal(np.array(['mean', 'sd', 'mc_error', - 'hpd_2.5', 'hpd_97.5', - 'n_eff', 'Rhat']), - ds.columns) + npt.assert_equal( + np.array( + ["mean", "sd", "mc_error", "hpd_2.5", "hpd_97.5", "n_eff", "Rhat"] + ), + ds.columns, + ) def test_column_names_decimal_hpd(self): ds = summary(self.mtrace, batches=3, alpha=0.001) - npt.assert_equal(np.array(['mean', 'sd', 'mc_error', - 'hpd_0.05', 'hpd_99.95', - 'n_eff', 'Rhat']), - ds.columns) + npt.assert_equal( + np.array( + ["mean", "sd", "mc_error", "hpd_0.05", "hpd_99.95", "n_eff", "Rhat"] + ), + ds.columns, + ) def test_column_names_custom_function(self): def customf(x): - return pd.Series(np.mean(x, 0), name='my_mean') + return pd.Series(np.mean(x, 0), name="my_mean") ds = summary(self.mtrace, batches=3, stat_funcs=[customf]) - npt.assert_equal(np.array(['my_mean']), ds.columns) + npt.assert_equal(np.array(["my_mean"]), ds.columns) def test_column_names_custom_function_extend(self): def customf(x): - return pd.Series(np.mean(x, 0), name='my_mean') - - ds = summary(self.mtrace, batches=3, - stat_funcs=[customf], extend=True) - npt.assert_equal(np.array(['mean', 'sd', 'mc_error', - 'hpd_2.5', 'hpd_97.5', 'my_mean', - 'n_eff', 'Rhat']), - ds.columns) + return pd.Series(np.mean(x, 0), name="my_mean") + + ds = summary(self.mtrace, batches=3, stat_funcs=[customf], extend=True) + npt.assert_equal( + np.array( + [ + "mean", + "sd", + "mc_error", + "hpd_2.5", + "hpd_97.5", + "my_mean", + "n_eff", + "Rhat", + ] + ), + ds.columns, + ) def test_value_alignment(self): mtrace = self.mtrace @@ -299,47 +325,52 @@ def test_value_alignment(self): result = mtrace[var].mean(0) for idx, val in np.ndenumerate(result): if idx: - vidx = var + '__' + '_'.join([str(i) for i in idx]) + vidx = var + "__" + "_".join([str(i) for i in idx]) else: vidx = var - npt.assert_equal(val, ds.loc[vidx, 'mean']) + npt.assert_equal(val, ds.loc[vidx, "mean"]) def test_row_names(self): with Model(): - pm.Uniform('x', 0, 1) + pm.Uniform("x", 0, 1) step = Metropolis() trace = pm.sample(100, step=step) ds = summary(trace, batches=3, include_transformed=True) - npt.assert_equal(np.array(['x_interval__', 'x']), - ds.index) + npt.assert_equal(np.array(["x_interval__", "x"]), ds.index) def test_value_n_eff_rhat(self): mu = -2.1 tau = 1.3 with Model(): - Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d - Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d - Normal('x2', mu, tau, shape=(2, 2), - testval=floatX_array(np.tile(.1, (2, 2))))# 2d - Normal('x3', mu, tau, shape=(2, 2, 3), - testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d + Normal("x0", mu, tau, testval=floatX_array(0.1)) # 0d + Normal("x1", mu, tau, shape=2, testval=floatX_array([0.1, 0.1])) # 1d + Normal( + "x2", mu, tau, shape=(2, 2), testval=floatX_array(np.tile(0.1, (2, 2))) + ) # 2d + Normal( + "x3", + mu, + tau, + shape=(2, 2, 3), + testval=floatX_array(np.tile(0.1, (2, 2, 3))), + ) # 3d trace = pm.sample(100, step=pm.Metropolis()) for varname in trace.varnames: # test effective_n value n_eff = pm.effective_n(trace, varnames=[varname])[varname] n_eff_df = np.asarray( - pm.summary(trace, varnames=[varname])['n_eff'] - ).reshape(n_eff.shape) + pm.summary(trace, varnames=[varname])["n_eff"] + ).reshape(n_eff.shape) npt.assert_equal(n_eff, n_eff_df) - + # test Rhat value rhat = pm.gelman_rubin(trace, varnames=[varname])[varname] - rhat_df = np.asarray( - pm.summary(trace, varnames=[varname])['Rhat'] - ).reshape(rhat.shape) + rhat_df = np.asarray(pm.summary(trace, varnames=[varname])["Rhat"]).reshape( + rhat.shape + ) npt.assert_equal(rhat, rhat_df) def test_psis(self): lw = np.random.randn(20000, 10) - _, ks = pm.stats._psislw(lw, 1.) - npt.assert_array_less(ks, .5) + _, ks = pm.stats._psislw(lw, 1.0) + npt.assert_array_less(ks, 0.5) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index ca67369c5b..a6f591bfeb 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -2,17 +2,38 @@ import tempfile from .checks import close_to -from .models import (simple_categorical, mv_simple, mv_simple_discrete, - mv_prior_simple, simple_2model_continuous) +from .models import ( + simple_categorical, + mv_simple, + mv_simple_discrete, + mv_prior_simple, + simple_2model_continuous, +) from pymc3.sampling import assign_step_methods, sample from pymc3.model import Model -from pymc3.step_methods import (NUTS, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, - Metropolis, Slice, CompoundStep, NormalProposal, - MultivariateNormalProposal, HamiltonianMC, - EllipticalSlice, SMC, DEMetropolis) +from pymc3.step_methods import ( + NUTS, + BinaryGibbsMetropolis, + CategoricalGibbsMetropolis, + Metropolis, + Slice, + CompoundStep, + NormalProposal, + MultivariateNormalProposal, + HamiltonianMC, + EllipticalSlice, + SMC, + DEMetropolis, +) from pymc3.theanof import floatX from pymc3.distributions import ( - Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal) + Binomial, + Normal, + Bernoulli, + Categorical, + Beta, + HalfNormal, +) from numpy.testing import assert_array_almost_equal import numpy as np @@ -25,136 +46,626 @@ class TestStepMethods(object): # yield test doesn't work subclassing object master_samples = { - Slice: np.array([ 0.10233528, 0.40458486, 0.17329217, 0.46281232, 0.22556278, - 1.52632836, -0.27823807, 0.02539625, 1.02711735, 0.03686346, - -0.62841281, -0.27125083, 0.31989505, 0.84031155, -0.18949138, - 1.60550262, 1.01375291, -0.29742941, 0.35312738, 0.43363622, - 1.18898078, 0.80063888, 0.38445644, 0.90184395, 1.69150017, - 2.05452171, -0.13334755, 1.61265408, 1.36579345, 1.3216292 , - -0.59487037, -0.34648927, 1.05107285, 0.42870305, 0.61552257, - 0.55239884, 0.13929271, 0.26213809, -0.2316028 , 0.19711046, - 1.42832629, 1.93641434, -0.81142379, -0.31059485, -0.3189694 , - 1.43542534, 0.40311093, 1.63103768, 0.24034874, 0.33924866, - 0.94951616, 0.71700185, 0.79273056, -0.44569146, 1.91974783, - 0.84673795, 1.12411833, -0.83123811, -0.54310095, -0.00721347, - 0.9925055 , 1.04015058, -0.34958074, -0.14926302, -0.47990225, - -0.75629446, -0.95942067, 1.68179204, 1.20598073, 1.39675733, - 1.22755935, 0.06728757, 1.05184231, 1.01126791, -0.67327093, - 0.21429651, 1.33730461, -1.56174184, -0.64348764, 0.98050636, - 0.25923049, 0.58622631, 0.46589069, 1.44367347, -0.43141573, - 1.08293374, -0.5563204 , 1.46287904, 1.26019815, 0.52972104, - 1.08792687, 1.10064358, 1.84881549, 0.91179647, 0.69316592, - -0.47657064, 2.22747063, 0.83388935, 0.84680716, -0.10556406]), - HamiltonianMC: np.array([ 0.43733634, 0.43733634, 0.15955614, -0.44355329, 0.21465731, - 0.30148244, 0.45527282, 0.45527282, 0.41753005, -0.03480236, - 1.16599611, 0.565306 , 0.565306 , 0.0077143 , -0.18291321, - -0.14577946, -0.00703353, -0.00703353, 0.14345194, -0.12345058, - 0.76875516, 0.76875516, 0.84289506, 0.24596225, 0.95287087, - 1.3799335 , 1.1493899 , 1.1493899 , 2.0255982 , -0.77850273, - 0.11604115, 0.11604115, 0.39296557, 0.34826491, 0.5951183 , - 0.63097341, 0.57938784, 0.57938784, 0.76570029, 0.63516046, - 0.23667784, 2.0151377 , 1.92064966, 1.09125654, -0.43716787, - 0.61939595, 0.30566853, 0.30566853, 0.3690641 , 0.3690641 , - 0.3690641 , 1.26497542, 0.90890334, 0.01482818, 0.01482818, - -0.15542473, 0.26475651, 0.32687263, 1.21902207, 0.6708017 , - -0.18867695, -0.18867695, -0.07141329, -0.04631175, -0.16855462, - -0.16855462, 1.05455573, 0.47371825, 0.47371825, 0.86307077, - 0.86307077, 0.51484125, 1.0022533 , 1.0022533 , 1.02370316, - 0.71331829, 0.71331829, 0.71331829, 0.40758664, 0.81307434, - -0.46269741, -0.60284666, 0.06710527, 0.06710527, -0.35055053, - 0.36727629, 0.36727629, 0.69350367, 0.11268647, 0.37681301, - 1.10168386, 0.49559472, 0.49559472, 0.06193658, -0.07947103, - 0.01969434, 1.28470893, -0.13536813, -0.13536813, 0.6575966 ]), - Metropolis: np.array([ 1.62434536, 1.01258895, 0.4844172 , 0.4844172 , 0.4844172 , - 0.4844172 , 0.4844172 , 0.4844172 , 0.4844172 , 0.4844172 , - 0.31198899, 0.31198899, 0.31198899, 0.31198899, 1.21284494, - 0.52911708, 0.261229 , 0.79158447, 0.10441177, -0.74079387, - -0.74079387, -0.50637818, -0.50637818, -0.50637818, -0.45557042, - -0.45557042, -0.33541147, 0.28179164, 0.58196196, 0.22971211, - 0.02081788, 0.60744107, 0.8930284 , 0.8930284 , 1.40595822, - 1.10786538, 1.10786538, 1.10786538, 1.10786538, -0.28863095, - -0.12859388, 0.74757504, 0.74757504, 0.74757504, 0.97766977, - 0.97766977, 0.75534163, 0.55458356, 0.75288328, 0.87189193, - 0.9937132 , 0.9937132 , 0.61842825, 0.61842825, 0.27457457, - 0.31817143, 0.31817143, 0.31817143, -0.77674042, -0.60735798, - 0.13319847, -0.82050213, -0.82050213, -0.50534274, -0.15479676, - -0.15479676, -0.19349227, -0.19349227, -0.21810923, -0.21810923, - -0.21810923, 1.0180548 , -0.18121323, 0.68213209, 0.68213209, - 1.23266958, 1.23266958, 0.60913885, 1.41099989, 1.45756718, - 1.45756718, 1.45756718, 1.45756718, 1.59526839, 1.82776295, - 1.82776295, 1.82776295, 1.82776295, 2.2691274 , 2.16897216, - 2.18638157, 1.06436284, 0.54726838, 0.54726838, 1.04247971, - 0.86777655, 0.86777655, 0.86777655, 0.86777655, 0.61914177]), - NUTS: np.array([ 0.550575 , 0.550575 , 0.80046332, 0.91590059, 1.34621916, - 1.34621916, -0.63917773, -0.65770809, -0.65770809, -0.64512868, - -1.05448153, -0.5225666 , 0.14335153, -0.0034499 , -0.0034499 , - 0.05309212, -0.53186371, 0.29325825, 0.43210854, 0.56284837, - 0.56284837, 0.38041767, 0.47322034, 0.49937368, 0.49937368, - 0.44424258, 0.44424258, -0.02790848, -0.40470145, -0.35725567, - -0.43744228, 0.41955432, 0.31099421, 0.31099421, 0.65811717, - 0.66649398, 0.38493786, 0.54114658, 0.54114658, 0.68222408, - 0.66404942, 1.44143108, 1.15638799, -0.06775775, -0.06775775, - 0.30418561, 0.23543403, 0.57934404, -0.5435111 , -0.47938915, - -0.23816662, 0.36793792, 0.36793792, 0.64980016, 0.52150456, - 0.64643321, 0.26130179, 1.10569077, 1.10569077, 1.23662797, - -0.36928735, -0.14303069, 0.85298904, 0.85298904, 0.31422085, - 0.32113762, 0.32113762, 1.0692238 , 1.0692238 , 1.60127576, - 1.49249738, 1.09065107, 0.84264371, 0.84264371, -0.08832343, - 0.04868027, -0.02679449, -0.02679449, 0.91989101, 0.65754478, - -0.39220625, 0.08379492, 1.03055634, 1.03055634, 1.71071332, - 1.58740483, 1.67905741, 0.77744868, 0.15050587, 0.15050587, - 0.73979127, 0.15445515, 0.13134717, 0.85068974, 0.85068974, - 0.6974799 , 0.16170472, 0.86405959, 0.86405959, -0.22032854]), - SMC: np.array([ 5.10950205e-02, 1.09811720e+00, 1.78330202e-01, 6.85938766e-01, - 1.42354476e-01, -1.59630758e+00, 1.57176810e+00, -4.01398917e-01, - 1.14567871e+00, 1.14954938e+00, 4.94399840e-01, 1.16253017e+00, - 1.17432244e+00, 7.79195162e-01, 1.29017945e+00, 2.53722905e-01, - 5.38589898e-01, 3.52121216e-01, 1.35795966e+00, 1.02086933e-01, - 1.58845251e+00, 6.76852927e-01, -1.04716592e-02, -1.01613324e-01, - 1.37680965e+00, 7.40036542e-01, 2.89069320e-01, 1.48153741e+00, - 9.58156958e-01, 5.73623782e-02, 7.68850721e-01, 3.68643390e-01, - 1.47645964e+00, 2.32596780e-01, -1.85008158e-01, 3.71335958e-01, - 2.68600102e+00, -4.89504443e-01, 6.54265561e-02, 3.80455349e-01, - 1.17875338e+00, 2.30233324e-01, 6.90960231e-01, 8.81668685e-01, - -2.19754340e-01, 1.27686862e-01, 3.28444250e-01, 1.34820635e-01, - 5.29725257e-01, 1.43783915e+00, -1.64754264e-01, 7.41446719e-01, - -1.17733186e+00, 6.01215658e-02, 1.82638158e-01, -2.23232214e-02, - -1.79877583e-02, 8.37949150e-01, 4.41964955e-01, -8.66524743e-01, - 4.90738093e-01, 2.42056488e-01, 4.67699626e-01, 2.91075351e-01, - 1.49541153e+00, 8.30730845e-01, 1.03956404e+00, -5.16162910e-01, - 2.84338859e-01, 1.72305888e+00, 9.52445566e-01, 1.48831718e+00, - 8.03455325e-01, 1.48840970e+00, 6.98122664e-01, 3.30187139e-01, - 7.88029712e-01, 9.31510828e-01, 1.01326878e+00, 2.26637755e-01, - 1.70703646e-01, -8.54429841e-01, 2.97254590e-01, -2.77843274e-01, - -2.25544207e-01, 1.98862826e-02, 5.05953885e-01, 4.98203941e-01, - 1.20897382e+00, -6.32958669e-05, -7.22425896e-01, 1.60930869e+00, - -5.02773645e-01, 2.46405678e+00, 9.16039706e-01, 1.14146060e+00, - -1.95781984e-01, -2.44653942e-01, 2.67851290e-01, 2.37462012e-01, - 6.71471950e-01, 1.18319765e+00, 1.29146530e+00, -3.14177753e-01, - -1.31041215e-02, 1.05029405e+00, 1.31202399e+00, 7.40532839e-02, - 9.15510041e-01, 7.71054604e-01, 9.83483263e-01, 9.03032142e-01, - 9.14191160e-01, 9.32285366e-01, 1.13937607e+00, -4.29155928e-01, - 3.44609229e-02, -5.46423555e-02, 1.34625982e+00, -1.28287047e-01, - -1.55214879e-02, 3.25294234e-01, 1.06120585e+00, -5.09891282e-01, - 1.25789335e+00, 1.01808348e+00, -9.92590713e-01, 1.72832932e+00, - 1.12232980e+00, 8.54801892e-01, 1.41534752e+00, 3.50798405e-01, - 3.69381623e-01, 1.48608411e+00, -1.15506310e-02, 1.57066360e+00, - 2.00747378e-01, 4.47219763e-01, 5.57720524e-01, -7.74295353e-02, - 1.79192501e+00, 7.66510475e-01, 1.38852488e+00, -4.06055122e-01, - 2.73203156e-01, 3.61014687e-01, 1.23574043e+00, 1.64565746e-01, - -9.89896480e-02, 9.26130265e-02, 1.06440134e+00, -1.55890408e-01, - 4.47131846e-01, -7.59186008e-01, -1.50881256e+00, -2.13928005e-01, - -4.19160151e-01, 1.75815544e+00, 7.45423008e-01, 6.94781506e-01, - 1.58596346e+00, 1.75508724e+00, 4.56070434e-01, 2.94128709e-02, - 1.17703970e+00, -9.90230827e-02, 8.42796845e-01, 1.79154944e+00, - 5.92779197e-01, 2.73562285e-01, 1.61597907e+00, 1.23514403e+00, - 4.86261080e-01, -3.10434934e-01, 5.57873722e-01, 6.50365217e-01, - -3.41009850e-01, 9.26851109e-01, 8.28936486e-01, 9.16180689e-02, - 1.30226405e+00, 3.73945789e-01, 6.04560122e-02, 6.00698708e-01, - 9.68764731e-02, 1.41904148e+00, 6.94182961e-03, 3.17504138e-01, - 5.90956041e-01, -5.78113887e-01, 5.26615565e-01, -4.19715252e-01, - 8.92891364e-01, 1.30207363e-01, 4.19899637e-01, 7.10275704e-01, - 9.27418179e-02, 1.85758044e+00, 4.76988907e-01, -1.36341398e-01]), + Slice: np.array( + [ + 0.10233528, + 0.40458486, + 0.17329217, + 0.46281232, + 0.22556278, + 1.52632836, + -0.27823807, + 0.02539625, + 1.02711735, + 0.03686346, + -0.62841281, + -0.27125083, + 0.31989505, + 0.84031155, + -0.18949138, + 1.60550262, + 1.01375291, + -0.29742941, + 0.35312738, + 0.43363622, + 1.18898078, + 0.80063888, + 0.38445644, + 0.90184395, + 1.69150017, + 2.05452171, + -0.13334755, + 1.61265408, + 1.36579345, + 1.3216292, + -0.59487037, + -0.34648927, + 1.05107285, + 0.42870305, + 0.61552257, + 0.55239884, + 0.13929271, + 0.26213809, + -0.2316028, + 0.19711046, + 1.42832629, + 1.93641434, + -0.81142379, + -0.31059485, + -0.3189694, + 1.43542534, + 0.40311093, + 1.63103768, + 0.24034874, + 0.33924866, + 0.94951616, + 0.71700185, + 0.79273056, + -0.44569146, + 1.91974783, + 0.84673795, + 1.12411833, + -0.83123811, + -0.54310095, + -0.00721347, + 0.9925055, + 1.04015058, + -0.34958074, + -0.14926302, + -0.47990225, + -0.75629446, + -0.95942067, + 1.68179204, + 1.20598073, + 1.39675733, + 1.22755935, + 0.06728757, + 1.05184231, + 1.01126791, + -0.67327093, + 0.21429651, + 1.33730461, + -1.56174184, + -0.64348764, + 0.98050636, + 0.25923049, + 0.58622631, + 0.46589069, + 1.44367347, + -0.43141573, + 1.08293374, + -0.5563204, + 1.46287904, + 1.26019815, + 0.52972104, + 1.08792687, + 1.10064358, + 1.84881549, + 0.91179647, + 0.69316592, + -0.47657064, + 2.22747063, + 0.83388935, + 0.84680716, + -0.10556406, + ] + ), + HamiltonianMC: np.array( + [ + 0.43733634, + 0.43733634, + 0.15955614, + -0.44355329, + 0.21465731, + 0.30148244, + 0.45527282, + 0.45527282, + 0.41753005, + -0.03480236, + 1.16599611, + 0.565306, + 0.565306, + 0.0077143, + -0.18291321, + -0.14577946, + -0.00703353, + -0.00703353, + 0.14345194, + -0.12345058, + 0.76875516, + 0.76875516, + 0.84289506, + 0.24596225, + 0.95287087, + 1.3799335, + 1.1493899, + 1.1493899, + 2.0255982, + -0.77850273, + 0.11604115, + 0.11604115, + 0.39296557, + 0.34826491, + 0.5951183, + 0.63097341, + 0.57938784, + 0.57938784, + 0.76570029, + 0.63516046, + 0.23667784, + 2.0151377, + 1.92064966, + 1.09125654, + -0.43716787, + 0.61939595, + 0.30566853, + 0.30566853, + 0.3690641, + 0.3690641, + 0.3690641, + 1.26497542, + 0.90890334, + 0.01482818, + 0.01482818, + -0.15542473, + 0.26475651, + 0.32687263, + 1.21902207, + 0.6708017, + -0.18867695, + -0.18867695, + -0.07141329, + -0.04631175, + -0.16855462, + -0.16855462, + 1.05455573, + 0.47371825, + 0.47371825, + 0.86307077, + 0.86307077, + 0.51484125, + 1.0022533, + 1.0022533, + 1.02370316, + 0.71331829, + 0.71331829, + 0.71331829, + 0.40758664, + 0.81307434, + -0.46269741, + -0.60284666, + 0.06710527, + 0.06710527, + -0.35055053, + 0.36727629, + 0.36727629, + 0.69350367, + 0.11268647, + 0.37681301, + 1.10168386, + 0.49559472, + 0.49559472, + 0.06193658, + -0.07947103, + 0.01969434, + 1.28470893, + -0.13536813, + -0.13536813, + 0.6575966, + ] + ), + Metropolis: np.array( + [ + 1.62434536, + 1.01258895, + 0.4844172, + 0.4844172, + 0.4844172, + 0.4844172, + 0.4844172, + 0.4844172, + 0.4844172, + 0.4844172, + 0.31198899, + 0.31198899, + 0.31198899, + 0.31198899, + 1.21284494, + 0.52911708, + 0.261229, + 0.79158447, + 0.10441177, + -0.74079387, + -0.74079387, + -0.50637818, + -0.50637818, + -0.50637818, + -0.45557042, + -0.45557042, + -0.33541147, + 0.28179164, + 0.58196196, + 0.22971211, + 0.02081788, + 0.60744107, + 0.8930284, + 0.8930284, + 1.40595822, + 1.10786538, + 1.10786538, + 1.10786538, + 1.10786538, + -0.28863095, + -0.12859388, + 0.74757504, + 0.74757504, + 0.74757504, + 0.97766977, + 0.97766977, + 0.75534163, + 0.55458356, + 0.75288328, + 0.87189193, + 0.9937132, + 0.9937132, + 0.61842825, + 0.61842825, + 0.27457457, + 0.31817143, + 0.31817143, + 0.31817143, + -0.77674042, + -0.60735798, + 0.13319847, + -0.82050213, + -0.82050213, + -0.50534274, + -0.15479676, + -0.15479676, + -0.19349227, + -0.19349227, + -0.21810923, + -0.21810923, + -0.21810923, + 1.0180548, + -0.18121323, + 0.68213209, + 0.68213209, + 1.23266958, + 1.23266958, + 0.60913885, + 1.41099989, + 1.45756718, + 1.45756718, + 1.45756718, + 1.45756718, + 1.59526839, + 1.82776295, + 1.82776295, + 1.82776295, + 1.82776295, + 2.2691274, + 2.16897216, + 2.18638157, + 1.06436284, + 0.54726838, + 0.54726838, + 1.04247971, + 0.86777655, + 0.86777655, + 0.86777655, + 0.86777655, + 0.61914177, + ] + ), + NUTS: np.array( + [ + 0.550575, + 0.550575, + 0.80046332, + 0.91590059, + 1.34621916, + 1.34621916, + -0.63917773, + -0.65770809, + -0.65770809, + -0.64512868, + -1.05448153, + -0.5225666, + 0.14335153, + -0.0034499, + -0.0034499, + 0.05309212, + -0.53186371, + 0.29325825, + 0.43210854, + 0.56284837, + 0.56284837, + 0.38041767, + 0.47322034, + 0.49937368, + 0.49937368, + 0.44424258, + 0.44424258, + -0.02790848, + -0.40470145, + -0.35725567, + -0.43744228, + 0.41955432, + 0.31099421, + 0.31099421, + 0.65811717, + 0.66649398, + 0.38493786, + 0.54114658, + 0.54114658, + 0.68222408, + 0.66404942, + 1.44143108, + 1.15638799, + -0.06775775, + -0.06775775, + 0.30418561, + 0.23543403, + 0.57934404, + -0.5435111, + -0.47938915, + -0.23816662, + 0.36793792, + 0.36793792, + 0.64980016, + 0.52150456, + 0.64643321, + 0.26130179, + 1.10569077, + 1.10569077, + 1.23662797, + -0.36928735, + -0.14303069, + 0.85298904, + 0.85298904, + 0.31422085, + 0.32113762, + 0.32113762, + 1.0692238, + 1.0692238, + 1.60127576, + 1.49249738, + 1.09065107, + 0.84264371, + 0.84264371, + -0.08832343, + 0.04868027, + -0.02679449, + -0.02679449, + 0.91989101, + 0.65754478, + -0.39220625, + 0.08379492, + 1.03055634, + 1.03055634, + 1.71071332, + 1.58740483, + 1.67905741, + 0.77744868, + 0.15050587, + 0.15050587, + 0.73979127, + 0.15445515, + 0.13134717, + 0.85068974, + 0.85068974, + 0.6974799, + 0.16170472, + 0.86405959, + 0.86405959, + -0.22032854, + ] + ), + SMC: np.array( + [ + 5.10950205e-02, + 1.09811720e00, + 1.78330202e-01, + 6.85938766e-01, + 1.42354476e-01, + -1.59630758e00, + 1.57176810e00, + -4.01398917e-01, + 1.14567871e00, + 1.14954938e00, + 4.94399840e-01, + 1.16253017e00, + 1.17432244e00, + 7.79195162e-01, + 1.29017945e00, + 2.53722905e-01, + 5.38589898e-01, + 3.52121216e-01, + 1.35795966e00, + 1.02086933e-01, + 1.58845251e00, + 6.76852927e-01, + -1.04716592e-02, + -1.01613324e-01, + 1.37680965e00, + 7.40036542e-01, + 2.89069320e-01, + 1.48153741e00, + 9.58156958e-01, + 5.73623782e-02, + 7.68850721e-01, + 3.68643390e-01, + 1.47645964e00, + 2.32596780e-01, + -1.85008158e-01, + 3.71335958e-01, + 2.68600102e00, + -4.89504443e-01, + 6.54265561e-02, + 3.80455349e-01, + 1.17875338e00, + 2.30233324e-01, + 6.90960231e-01, + 8.81668685e-01, + -2.19754340e-01, + 1.27686862e-01, + 3.28444250e-01, + 1.34820635e-01, + 5.29725257e-01, + 1.43783915e00, + -1.64754264e-01, + 7.41446719e-01, + -1.17733186e00, + 6.01215658e-02, + 1.82638158e-01, + -2.23232214e-02, + -1.79877583e-02, + 8.37949150e-01, + 4.41964955e-01, + -8.66524743e-01, + 4.90738093e-01, + 2.42056488e-01, + 4.67699626e-01, + 2.91075351e-01, + 1.49541153e00, + 8.30730845e-01, + 1.03956404e00, + -5.16162910e-01, + 2.84338859e-01, + 1.72305888e00, + 9.52445566e-01, + 1.48831718e00, + 8.03455325e-01, + 1.48840970e00, + 6.98122664e-01, + 3.30187139e-01, + 7.88029712e-01, + 9.31510828e-01, + 1.01326878e00, + 2.26637755e-01, + 1.70703646e-01, + -8.54429841e-01, + 2.97254590e-01, + -2.77843274e-01, + -2.25544207e-01, + 1.98862826e-02, + 5.05953885e-01, + 4.98203941e-01, + 1.20897382e00, + -6.32958669e-05, + -7.22425896e-01, + 1.60930869e00, + -5.02773645e-01, + 2.46405678e00, + 9.16039706e-01, + 1.14146060e00, + -1.95781984e-01, + -2.44653942e-01, + 2.67851290e-01, + 2.37462012e-01, + 6.71471950e-01, + 1.18319765e00, + 1.29146530e00, + -3.14177753e-01, + -1.31041215e-02, + 1.05029405e00, + 1.31202399e00, + 7.40532839e-02, + 9.15510041e-01, + 7.71054604e-01, + 9.83483263e-01, + 9.03032142e-01, + 9.14191160e-01, + 9.32285366e-01, + 1.13937607e00, + -4.29155928e-01, + 3.44609229e-02, + -5.46423555e-02, + 1.34625982e00, + -1.28287047e-01, + -1.55214879e-02, + 3.25294234e-01, + 1.06120585e00, + -5.09891282e-01, + 1.25789335e00, + 1.01808348e00, + -9.92590713e-01, + 1.72832932e00, + 1.12232980e00, + 8.54801892e-01, + 1.41534752e00, + 3.50798405e-01, + 3.69381623e-01, + 1.48608411e00, + -1.15506310e-02, + 1.57066360e00, + 2.00747378e-01, + 4.47219763e-01, + 5.57720524e-01, + -7.74295353e-02, + 1.79192501e00, + 7.66510475e-01, + 1.38852488e00, + -4.06055122e-01, + 2.73203156e-01, + 3.61014687e-01, + 1.23574043e00, + 1.64565746e-01, + -9.89896480e-02, + 9.26130265e-02, + 1.06440134e00, + -1.55890408e-01, + 4.47131846e-01, + -7.59186008e-01, + -1.50881256e00, + -2.13928005e-01, + -4.19160151e-01, + 1.75815544e00, + 7.45423008e-01, + 6.94781506e-01, + 1.58596346e00, + 1.75508724e00, + 4.56070434e-01, + 2.94128709e-02, + 1.17703970e00, + -9.90230827e-02, + 8.42796845e-01, + 1.79154944e00, + 5.92779197e-01, + 2.73562285e-01, + 1.61597907e00, + 1.23514403e00, + 4.86261080e-01, + -3.10434934e-01, + 5.57873722e-01, + 6.50365217e-01, + -3.41009850e-01, + 9.26851109e-01, + 8.28936486e-01, + 9.16180689e-02, + 1.30226405e00, + 3.73945789e-01, + 6.04560122e-02, + 6.00698708e-01, + 9.68764731e-02, + 1.41904148e00, + 6.94182961e-03, + 3.17504138e-01, + 5.90956041e-01, + -5.78113887e-01, + 5.26615565e-01, + -4.19715252e-01, + 8.92891364e-01, + 1.30207363e-01, + 4.19899637e-01, + 7.10275704e-01, + 9.27418179e-02, + 1.85758044e00, + 4.76988907e-01, + -1.36341398e-01, + ] + ), } def setup_class(self): @@ -163,7 +674,9 @@ def setup_class(self): def teardown_class(self): shutil.rmtree(self.temp_dir) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_sample_exact(self): for step_method in self.master_samples: self.check_trace(step_method) @@ -186,27 +699,37 @@ def check_trace(self, step_method): """ n_steps = 100 with Model() as model: - x = Normal('x', mu=0, sd=1) - y = Normal('y', mu=x, sd=1, observed=1) - if step_method.__name__ == 'SMC': - trace = sample(draws=200, - random_seed=1, - progressbar=False, - step=step_method()) - elif step_method.__name__ == 'NUTS': + x = Normal("x", mu=0, sd=1) + y = Normal("y", mu=x, sd=1, observed=1) + if step_method.__name__ == "SMC": + trace = sample( + draws=200, random_seed=1, progressbar=False, step=step_method() + ) + elif step_method.__name__ == "NUTS": step = step_method(scaling=model.test_point) - trace = sample(0, tune=n_steps, - discard_tuned_samples=False, - step=step, random_seed=1, chains=1) + trace = sample( + 0, + tune=n_steps, + discard_tuned_samples=False, + step=step, + random_seed=1, + chains=1, + ) else: - trace = sample(0, tune=n_steps, - discard_tuned_samples=False, - step=step_method(), random_seed=1, chains=1) + trace = sample( + 0, + tune=n_steps, + discard_tuned_samples=False, + step=step_method(), + random_seed=1, + chains=1, + ) assert_array_almost_equal( - trace['x'], + trace["x"], self.master_samples[step_method], - decimal=select_by_precision(float64=6, float32=4)) + decimal=select_by_precision(float64=6, float32=4), + ) def check_stat(self, check, trace, name): for (var, stat, value, bound) in check: @@ -215,9 +738,8 @@ def check_stat(self, check, trace, name): def test_step_continuous(self): start, model, (mu, C) = mv_simple() - unc = np.diag(C) ** .5 - check = (('x', np.mean, mu, unc / 10.), - ('x', np.std, unc, unc / 10.)) + unc = np.diag(C) ** 0.5 + check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, unc, unc / 10.0)) with model: steps = ( Slice(), @@ -227,59 +749,77 @@ def test_step_continuous(self): Slice(blocked=True), HamiltonianMC(scaling=C, is_cov=True), NUTS(scaling=C, is_cov=True), - CompoundStep([ - HamiltonianMC(scaling=C, is_cov=True), - HamiltonianMC(scaling=C, is_cov=True, blocked=False)]), + CompoundStep( + [ + HamiltonianMC(scaling=C, is_cov=True), + HamiltonianMC(scaling=C, is_cov=True, blocked=False), + ] + ), ) for step in steps: - trace = sample(0, tune=8000, chains=1, - discard_tuned_samples=False, step=step, - start=start, model=model, random_seed=1) + trace = sample( + 0, + tune=8000, + chains=1, + discard_tuned_samples=False, + step=step, + start=start, + model=model, + random_seed=1, + ) self.check_stat(check, trace, step.__class__.__name__) def test_step_discrete(self): if theano.config.floatX == "float32": return # Cannot use @skip because it only skips one iteration of the yield start, model, (mu, C) = mv_simple_discrete() - unc = np.diag(C) ** .5 - check = (('x', np.mean, mu, unc / 10.), - ('x', np.std, unc, unc / 10.)) + unc = np.diag(C) ** 0.5 + check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, unc, unc / 10.0)) with model: - steps = ( - Metropolis(S=C, proposal_dist=MultivariateNormalProposal), - ) + steps = (Metropolis(S=C, proposal_dist=MultivariateNormalProposal),) for step in steps: - trace = sample(20000, tune=0, step=step, start=start, model=model, - random_seed=1, chains=1) + trace = sample( + 20000, + tune=0, + step=step, + start=start, + model=model, + random_seed=1, + chains=1, + ) self.check_stat(check, trace, step.__class__.__name__) def test_step_categorical(self): start, model, (mu, C) = simple_categorical() - unc = C ** .5 - check = (('x', np.mean, mu, unc / 10.), - ('x', np.std, unc, unc / 10.)) + unc = C ** 0.5 + check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, unc, unc / 10.0)) with model: steps = ( - CategoricalGibbsMetropolis(model.x, proposal='uniform'), - CategoricalGibbsMetropolis(model.x, proposal='proportional'), + CategoricalGibbsMetropolis(model.x, proposal="uniform"), + CategoricalGibbsMetropolis(model.x, proposal="proportional"), ) for step in steps: - trace = sample(8000, tune=0, step=step, start=start, model=model, random_seed=1) + trace = sample( + 8000, tune=0, step=step, start=start, model=model, random_seed=1 + ) self.check_stat(check, trace, step.__class__.__name__) def test_step_elliptical_slice(self): start, model, (K, L, mu, std, noise) = mv_prior_simple() unc = noise ** 0.5 - check = (('x', np.mean, mu, unc / 10.), - ('x', np.std, std, unc / 10.)) + check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, std, unc / 10.0)) with model: - steps = ( - EllipticalSlice(prior_cov=K), - EllipticalSlice(prior_chol=L), - ) + steps = (EllipticalSlice(prior_cov=K), EllipticalSlice(prior_chol=L)) for step in steps: - trace = sample(5000, tune=0, step=step, start=start, model=model, - random_seed=1, chains=1) + trace = sample( + 5000, + tune=0, + step=step, + start=start, + model=model, + random_seed=1, + chains=1, + ) self.check_stat(check, trace, step.__class__.__name__) @@ -309,8 +849,10 @@ def test_mv_proposal(self): class TestCompoundStep(object): samplers = (Metropolis, Slice, HamiltonianMC, NUTS, DEMetropolis) - @pytest.mark.skipif(theano.config.floatX == "float32", - reason="Test fails on 32 bit due to linalg issues") + @pytest.mark.skipif( + theano.config.floatX == "float32", + reason="Test fails on 32 bit due to linalg issues", + ) def test_non_blocked(self): """Test that samplers correctly create non-blocked compound steps.""" _, model = simple_2model_continuous() @@ -318,8 +860,10 @@ def test_non_blocked(self): for sampler in self.samplers: assert isinstance(sampler(blocked=False), CompoundStep) - @pytest.mark.skipif(theano.config.floatX == "float32", - reason="Test fails on 32 bit due to linalg issues") + @pytest.mark.skipif( + theano.config.floatX == "float32", + reason="Test fails on 32 bit due to linalg issues", + ) def test_blocked(self): _, model = simple_2model_continuous() with model: @@ -333,50 +877,53 @@ class TestAssignStepMethods(object): def test_bernoulli(self): """Test bernoulli distribution is assigned binary gibbs metropolis method""" with Model() as model: - Bernoulli('x', 0.5) + Bernoulli("x", 0.5) steps = assign_step_methods(model, []) assert isinstance(steps, BinaryGibbsMetropolis) def test_normal(self): """Test normal distribution is assigned NUTS method""" with Model() as model: - Normal('x', 0, 1) + Normal("x", 0, 1) steps = assign_step_methods(model, []) assert isinstance(steps, NUTS) def test_categorical(self): """Test categorical distribution is assigned categorical gibbs metropolis method""" with Model() as model: - Categorical('x', np.array([0.25, 0.75])) + Categorical("x", np.array([0.25, 0.75])) steps = assign_step_methods(model, []) assert isinstance(steps, BinaryGibbsMetropolis) with Model() as model: - Categorical('y', np.array([0.25, 0.70, 0.05])) + Categorical("y", np.array([0.25, 0.70, 0.05])) steps = assign_step_methods(model, []) assert isinstance(steps, CategoricalGibbsMetropolis) def test_binomial(self): """Test binomial distribution is assigned metropolis method.""" with Model() as model: - Binomial('x', 10, 0.5) + Binomial("x", 10, 0.5) steps = assign_step_methods(model, []) assert isinstance(steps, Metropolis) def test_normal_nograd_op(self): """Test normal distribution without an implemented gradient is assigned slice method""" with Model() as model: - x = Normal('x', 0, 1) + x = Normal("x", 0, 1) # a custom Theano Op that does not have a grad: is_64 = theano.config.floatX == "float64" itypes = [tt.dscalar] if is_64 else [tt.fscalar] otypes = [tt.dscalar] if is_64 else [tt.fscalar] + @theano.as_op(itypes, otypes) def kill_grad(x): return x data = np.random.normal(size=(100,)) - Normal("y", mu=kill_grad(x), sd=1, observed=data.astype(theano.config.floatX)) + Normal( + "y", mu=kill_grad(x), sd=1, observed=data.astype(theano.config.floatX) + ) steps = assign_step_methods(model, []) assert isinstance(steps, Slice) @@ -389,7 +936,7 @@ class TestPopulationSamplers(object): def test_checks_population_size(self): """Test that population samplers check the population size.""" with Model() as model: - n = Normal('n', mu=0, sd=1) + n = Normal("n", mu=0, sd=1) for stepper in TestPopulationSamplers.steppers: step = stepper() with pytest.raises(ValueError): @@ -399,83 +946,93 @@ def test_checks_population_size(self): def test_parallelized_chains_are_random(self): with Model() as model: - x = Normal('x', 0, 1) + x = Normal("x", 0, 1) for stepper in TestPopulationSamplers.steppers: step = stepper() - trace = sample(chains=4, draws=20, tune=0, step=DEMetropolis(), - parallelize=True) - samples = np.array(trace.get_values('x', combine=False))[:,5] + trace = sample( + chains=4, draws=20, tune=0, step=DEMetropolis(), parallelize=True + ) + samples = np.array(trace.get_values("x", combine=False))[:, 5] - assert len(set(samples)) == 4, 'Parallelized {} ' \ - 'chains are identical.'.format(stepper) + assert ( + len(set(samples)) == 4 + ), "Parallelized {} " "chains are identical.".format(stepper) pass -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestNutsCheckTrace(object): def test_multiple_samplers(self, caplog): with Model(): - prob = Beta('prob', alpha=5., beta=3.) - Binomial('outcome', n=1, p=prob) + prob = Beta("prob", alpha=5.0, beta=3.0) + Binomial("outcome", n=1, p=prob) caplog.clear() - sample(3, tune=2, discard_tuned_samples=False, - n_init=None, chains=1) + sample(3, tune=2, discard_tuned_samples=False, n_init=None, chains=1) messages = [msg.msg for msg in caplog.records] - assert all('boolean index did not' not in msg for msg in messages) + assert all("boolean index did not" not in msg for msg in messages) def test_bad_init(self): with Model(): - HalfNormal('a', sd=1, testval=-1, transform=None) + HalfNormal("a", sd=1, testval=-1, transform=None) with pytest.raises(ValueError) as error: sample(init=None) - error.match('Bad initial') + error.match("Bad initial") def test_linalg(self, caplog): with Model(): - a = Normal('a', shape=2) + a = Normal("a", shape=2) a = tt.switch(a > 0, np.inf, a) b = tt.slinalg.solve(floatX(np.eye(2)), a) - Normal('c', mu=b, shape=2) + Normal("c", mu=b, shape=2) caplog.clear() trace = sample(20, init=None, tune=5, chains=2) warns = [msg.msg for msg in caplog.records] - assert np.any(trace['diverging']) + assert np.any(trace["diverging"]) assert ( - any('divergence after tuning' in warn - for warn in warns) - or - any('divergences after tuning' in warn - for warn in warns) - or - any('only diverging samples' in warn - for warn in warns)) + any("divergence after tuning" in warn for warn in warns) + or any("divergences after tuning" in warn for warn in warns) + or any("only diverging samples" in warn for warn in warns) + ) with pytest.raises(ValueError) as error: trace.report.raise_ok() - error.match('issues during sampling') + error.match("issues during sampling") assert not trace.report.ok def test_sampler_stats(self): with Model() as model: - x = Normal('x', mu=0, sd=1) + x = Normal("x", mu=0, sd=1) trace = sample(draws=10, tune=1, chains=1) # Assert stats exist and have the correct shape. expected_stat_names = { - 'depth', 'diverging', 'energy', 'energy_error', 'model_logp', - 'max_energy_error', 'mean_tree_accept', 'step_size', - 'step_size_bar', 'tree_size', 'tune' + "depth", + "diverging", + "energy", + "energy_error", + "model_logp", + "max_energy_error", + "mean_tree_accept", + "step_size", + "step_size_bar", + "tree_size", + "tune", } - assert(trace.stat_names == expected_stat_names) + assert trace.stat_names == expected_stat_names for varname in trace.stat_names: - assert(trace.get_sampler_stats(varname).shape == (10,)) + assert trace.get_sampler_stats(varname).shape == (10,) # Assert model logp is computed correctly: computing post-sampling # and tracking while sampling should give same results. - model_logp_ = np.array([ - model.logp(trace.point(i, chain=c)) - for c in trace.chains for i in range(len(trace)) - ]) - assert((trace.model_logp == model_logp_).all()) + model_logp_ = np.array( + [ + model.logp(trace.point(i, chain=c)) + for c in trace.chains + for i in range(len(trace)) + ] + ) + assert (trace.model_logp == model_logp_).all() diff --git a/pymc3/tests/test_text_backend.py b/pymc3/tests/test_text_backend.py index f524d16605..d2dc867a8e 100644 --- a/pymc3/tests/test_text_backend.py +++ b/pymc3/tests/test_text_backend.py @@ -6,7 +6,7 @@ class TestTextSampling(object): - name = 'text-db' + name = "text-db" def test_supports_sampler_stats(self): with pm.Model(): @@ -20,53 +20,57 @@ def teardown_method(self): class TestText0dSampling(bf.SamplingTestCase): backend = text.Text - name = 'text-db' + name = "text-db" shape = () class TestText1dSampling(bf.SamplingTestCase): backend = text.Text - name = 'text-db' + name = "text-db" shape = 2 class TestText2dSampling(bf.SamplingTestCase): backend = text.Text - name = 'text-db' + name = "text-db" shape = (2, 3) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestText0dSelection(bf.SelectionTestCase): backend = text.Text - name = 'text-db' + name = "text-db" shape = () class TestText1dSelection(bf.SelectionTestCase): backend = text.Text - name = 'text-db' + name = "text-db" shape = 2 class TestText2dSelection(bf.SelectionTestCase): backend = text.Text - name = 'text-db' + name = "text-db" shape = (2, 3) class TestTextDumpLoad(bf.DumpLoadTestCase): backend = text.Text load_func = staticmethod(text.load) - name = 'text-db' + name = "text-db" shape = (2, 3) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestTextDumpFunction(bf.BackendEqualityTestCase): backend0 = backend1 = ndarray.NDArray name0 = None - name1 = 'text-db' + name1 = "text-db" shape = (2, 3) @classmethod @@ -81,5 +85,5 @@ class TestNDArrayTextEquality(bf.BackendEqualityTestCase): backend0 = ndarray.NDArray name0 = None backend1 = text.Text - name1 = 'text-db' + name1 = "text-db" shape = (2, 3) diff --git a/pymc3/tests/test_theanof.py b/pymc3/tests/test_theanof.py index 4a681a2a00..62f4593534 100644 --- a/pymc3/tests/test_theanof.py +++ b/pymc3/tests/test_theanof.py @@ -9,20 +9,21 @@ class TestSetTheanoConfig(object): def test_invalid_key(self): with pytest.raises(ValueError) as e: - set_theano_conf({'bad_key': True}) - e.match('Unknown') + set_theano_conf({"bad_key": True}) + e.match("Unknown") def test_restore_when_bad_key(self): - with theano.configparser.change_flags(compute_test_value='off'): + with theano.configparser.change_flags(compute_test_value="off"): with pytest.raises(ValueError): conf = collections.OrderedDict( - [('compute_test_value', 'raise'), ('bad_key', True)]) + [("compute_test_value", "raise"), ("bad_key", True)] + ) set_theano_conf(conf) - assert theano.config.compute_test_value == 'off' + assert theano.config.compute_test_value == "off" def test_restore(self): - with theano.configparser.change_flags(compute_test_value='off'): - conf = set_theano_conf({'compute_test_value': 'raise'}) - assert conf == {'compute_test_value': 'off'} + with theano.configparser.change_flags(compute_test_value="off"): + conf = set_theano_conf({"compute_test_value": "raise"}) + assert conf == {"compute_test_value": "off"} conf = set_theano_conf(conf) - assert conf == {'compute_test_value': 'raise'} + assert conf == {"compute_test_value": "raise"} diff --git a/pymc3/tests/test_tracetab.py b/pymc3/tests/test_tracetab.py index 2d1380719d..872289d5f0 100644 --- a/pymc3/tests/test_tracetab.py +++ b/pymc3/tests/test_tracetab.py @@ -7,7 +7,7 @@ class TestTraceToDf(bf.ModelBackendSampledTestCase): backend = ndarray.NDArray - name = 'text-db' + name = "text-db" shape = (2, 3) def test_trace_to_dataframe(self): @@ -22,9 +22,9 @@ def test_trace_to_dataframe(self): # `shape`. if vararr.shape[1:] != self.shape: continue - npt.assert_equal(vararr[:, 0, 0], df[varname + '__0_0'].values) - npt.assert_equal(vararr[:, 1, 0], df[varname + '__1_0'].values) - npt.assert_equal(vararr[:, 1, 2], df[varname + '__1_2'].values) + npt.assert_equal(vararr[:, 0, 0], df[varname + "__0_0"].values) + npt.assert_equal(vararr[:, 1, 0], df[varname + "__1_0"].values) + npt.assert_equal(vararr[:, 1, 2], df[varname + "__1_2"].values) checked = True assert checked @@ -40,38 +40,37 @@ def test_trace_to_dataframe_chain_arg(self): # `shape`. if vararr.shape[1:] != self.shape: continue - npt.assert_equal(vararr[:, 0, 0], df[varname + '__0_0'].values) - npt.assert_equal(vararr[:, 1, 0], df[varname + '__1_0'].values) - npt.assert_equal(vararr[:, 1, 2], df[varname + '__1_2'].values) + npt.assert_equal(vararr[:, 0, 0], df[varname + "__0_0"].values) + npt.assert_equal(vararr[:, 1, 0], df[varname + "__1_0"].values) + npt.assert_equal(vararr[:, 1, 2], df[varname + "__1_2"].values) checked = True assert checked def test_create_flat_names_0d(): shape = () - result = ttab.create_flat_names('x', shape) - expected = ['x'] + result = ttab.create_flat_names("x", shape) + expected = ["x"] assert result == expected assert ttab._create_shape(result) == shape def test_create_flat_names_1d(): - shape = 2, - result = ttab.create_flat_names('x', shape) - expected = ['x__0', 'x__1'] + shape = (2,) + result = ttab.create_flat_names("x", shape) + expected = ["x__0", "x__1"] assert result == expected assert ttab._create_shape(result) == shape def test_create_flat_names_2d(): shape = 2, 3 - result = ttab.create_flat_names('x', shape) - expected = ['x__0_0', 'x__0_1', 'x__0_2', - 'x__1_0', 'x__1_1', 'x__1_2'] + result = ttab.create_flat_names("x", shape) + expected = ["x__0_0", "x__0_1", "x__0_2", "x__1_0", "x__1_1", "x__1_2"] assert result == expected assert ttab._create_shape(result) == shape def test_create_flat_names_3d(): shape = 2, 3, 4 - assert ttab._create_shape(ttab.create_flat_names('x', shape)) == shape + assert ttab._create_shape(ttab.create_flat_names("x", shape)) == shape diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index 5b7dc58345..e525b3726d 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -6,20 +6,29 @@ import theano import theano.tensor as tt from .helpers import SeededTest -from .test_distributions import (Simplex, Rplusbig, Rminusbig, - Unit, R, Vector, MultiSimplex, - Circ, SortedVector, UnitSortedVector) +from .test_distributions import ( + Simplex, + Rplusbig, + Rminusbig, + Unit, + R, + Vector, + MultiSimplex, + Circ, + SortedVector, + UnitSortedVector, +) from .checks import close_to, close_to_logical from ..theanof import jacobian # some transforms (stick breaking) require additon of small slack in order to be numerically # stable. The minimal addable slack for float32 is higher thus we need to be less strict -tol = 1e-7 if theano.config.floatX == 'float64' else 1e-6 +tol = 1e-7 if theano.config.floatX == "float64" else 1e-6 def check_transform(transform, domain, constructor=tt.dscalar, test=0): - x = constructor('x') + x = constructor("x") x.tag.test_value = test # test forward and forward_val forward_f = theano.function([x], transform.forward(x)) @@ -35,18 +44,21 @@ def check_vector_transform(transform, domain): def get_values(transform, domain=R, constructor=tt.dscalar, test=0): - x = constructor('x') + x = constructor("x") x.tag.test_value = test f = theano.function([x], transform.backward(x)) return np.array([f(val) for val in domain.vals]) -def check_jacobian_det(transform, domain, - constructor=tt.dscalar, - test=0, - make_comparable=None, - elemwise=False): - y = constructor('y') +def check_jacobian_det( + transform, + domain, + constructor=tt.dscalar, + test=0, + make_comparable=None, + elemwise=False, +): + y = constructor("y") y.tag.test_value = test x = transform.backward(y) @@ -61,49 +73,52 @@ def check_jacobian_det(transform, domain, # ljd = log jacobian det actual_ljd = theano.function([y], jac) - computed_ljd = theano.function([y], tt.as_tensor_variable( - transform.jacobian_det(y)), on_unused_input='ignore') + computed_ljd = theano.function( + [y], tt.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore" + ) for yval in domain.vals: - close_to( - actual_ljd(yval), - computed_ljd(yval), tol) + close_to(actual_ljd(yval), computed_ljd(yval), tol) def test_simplex(): check_vector_transform(tr.stick_breaking, Simplex(2)) check_vector_transform(tr.stick_breaking, Simplex(4)) - check_transform(tr.stick_breaking, MultiSimplex( - 3, 2), constructor=tt.dmatrix, test=np.zeros((2, 2))) + check_transform( + tr.stick_breaking, + MultiSimplex(3, 2), + constructor=tt.dmatrix, + test=np.zeros((2, 2)), + ) def test_simplex_bounds(): - vals = get_values(tr.stick_breaking, Vector(R, 2), - tt.dvector, np.array([0, 0])) + vals = get_values(tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0])) close_to(vals.sum(axis=1), 1, tol) close_to_logical(vals > 0, True, tol) close_to_logical(vals < 1, True, tol) - check_jacobian_det(tr.stick_breaking, Vector( - R, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1]) + check_jacobian_det( + tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1] + ) def test_sum_to_1(): check_vector_transform(tr.sum_to_1, Simplex(2)) check_vector_transform(tr.sum_to_1, Simplex(4)) - check_jacobian_det(tr.sum_to_1, Vector(Unit, 2), - tt.dvector, np.array([0, 0]), lambda x: x[:-1]) + check_jacobian_det( + tr.sum_to_1, Vector(Unit, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1] + ) def test_log(): check_transform(tr.log, Rplusbig) check_jacobian_det(tr.log, Rplusbig, elemwise=True) - check_jacobian_det(tr.log, Vector(Rplusbig, 2), - tt.dvector, [0, 0], elemwise=True) + check_jacobian_det(tr.log, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True) vals = get_values(tr.log) close_to_logical(vals > 0, True, tol) @@ -113,8 +128,9 @@ def test_log_exp_m1(): check_transform(tr.log_exp_m1, Rplusbig) check_jacobian_det(tr.log_exp_m1, Rplusbig, elemwise=True) - check_jacobian_det(tr.log_exp_m1, Vector(Rplusbig, 2), - tt.dvector, [0, 0], elemwise=True) + check_jacobian_det( + tr.log_exp_m1, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True + ) vals = get_values(tr.log_exp_m1) close_to_logical(vals > 0, True, tol) @@ -124,8 +140,9 @@ def test_logodds(): check_transform(tr.logodds, Unit) check_jacobian_det(tr.logodds, Unit, elemwise=True) - check_jacobian_det(tr.logodds, Vector(Unit, 2), - tt.dvector, [.5, .5], elemwise=True) + check_jacobian_det( + tr.logodds, Vector(Unit, 2), tt.dvector, [0.5, 0.5], elemwise=True + ) vals = get_values(tr.logodds) close_to_logical(vals > 0, True, tol) @@ -137,8 +154,7 @@ def test_lowerbound(): check_transform(trans, Rplusbig) check_jacobian_det(trans, Rplusbig, elemwise=True) - check_jacobian_det(trans, Vector(Rplusbig, 2), - tt.dvector, [0, 0], elemwise=True) + check_jacobian_det(trans, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True) vals = get_values(trans) close_to_logical(vals > 0, True, tol) @@ -149,15 +165,14 @@ def test_upperbound(): check_transform(trans, Rminusbig) check_jacobian_det(trans, Rminusbig, elemwise=True) - check_jacobian_det(trans, Vector(Rminusbig, 2), - tt.dvector, [-1, -1], elemwise=True) + check_jacobian_det(trans, Vector(Rminusbig, 2), tt.dvector, [-1, -1], elemwise=True) vals = get_values(trans) close_to_logical(vals < 0, True, tol) def test_interval(): - for a, b in [(-4, 5.5), (.1, .7), (-10, 4.3)]: + for a, b in [(-4, 5.5), (0.1, 0.7), (-10, 4.3)]: domain = Unit * np.float64(b - a) + np.float64(a) trans = tr.interval(a, b) check_transform(trans, domain) @@ -185,24 +200,26 @@ def test_circular(): def test_ordered(): check_vector_transform(tr.ordered, SortedVector(6)) - check_jacobian_det(tr.ordered, Vector(R, 2), - tt.dvector, np.array([0, 0]), elemwise=False) + check_jacobian_det( + tr.ordered, Vector(R, 2), tt.dvector, np.array([0, 0]), elemwise=False + ) - vals = get_values(tr.ordered, Vector(R, 3), - tt.dvector, np.zeros(3)) + vals = get_values(tr.ordered, Vector(R, 3), tt.dvector, np.zeros(3)) close_to_logical(np.diff(vals) >= 0, True, tol) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) def test_chain(): chain_tranf = tr.Chain([tr.logodds, tr.ordered]) check_vector_transform(chain_tranf, UnitSortedVector(3)) - check_jacobian_det(chain_tranf, Vector(R, 4), - tt.dvector, np.zeros(4), elemwise=False) + check_jacobian_det( + chain_tranf, Vector(R, 4), tt.dvector, np.zeros(4), elemwise=False + ) - vals = get_values(chain_tranf, Vector(R, 5), - tt.dvector, np.zeros(5)) + vals = get_values(chain_tranf, Vector(R, 5), tt.dvector, np.zeros(5)) close_to_logical(np.diff(vals) >= 0, True, tol) @@ -211,7 +228,7 @@ def build_model(self, distfam, params, shape, transform, testval=None): if testval is not None: testval = pm.floatX(testval) with pm.Model() as m: - distfam('x', shape=shape, transform=transform, testval=testval, **params) + distfam("x", shape=shape, transform=transform, testval=testval, **params) return m def check_transform_elementwise_logp(self, model): @@ -234,7 +251,7 @@ def check_transform_elementwise_logp(self, model): def check_vectortransform_elementwise_logp(self, model, vect_opt=0): x0 = model.deterministics[0] x = model.free_RVs[0] - assert (x.ndim-1) == x.logp_elemwiset.ndim + assert (x.ndim - 1) == x.logp_elemwiset.ndim pt = model.test_point array = np.random.randn(*pt[x.name].shape) @@ -252,145 +269,186 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): close_to(x.logp_elemwise(pt), elementwiselogp.eval(), tol) - @pytest.mark.parametrize('sd,shape', [ - (2.5, 2), - (5., (2, 3)), - (np.ones(3)*10., (4, 3)), - ]) + @pytest.mark.parametrize( + "sd,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3) * 10.0, (4, 3))] + ) def test_half_normal(self, sd, shape): - model = self.build_model(pm.HalfNormal, {'sd': sd}, shape=shape, transform=tr.log) + model = self.build_model( + pm.HalfNormal, {"sd": sd}, shape=shape, transform=tr.log + ) self.check_transform_elementwise_logp(model) - @pytest.mark.parametrize('lam,shape', [ - (2.5, 2), - (5., (2, 3)), - (np.ones(3), (4, 3)) - ]) + @pytest.mark.parametrize( + "lam,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))] + ) def test_exponential(self, lam, shape): - model = self.build_model(pm.Exponential, {'lam': lam}, shape=shape, transform=tr.log) + model = self.build_model( + pm.Exponential, {"lam": lam}, shape=shape, transform=tr.log + ) self.check_transform_elementwise_logp(model) - @pytest.mark.parametrize('a,b,shape', [ - (1., 1., 2), - (.5, .5, (2, 3)), - (np.ones(3), np.ones(3), (4, 3)), - ]) + @pytest.mark.parametrize( + "a,b,shape", + [(1.0, 1.0, 2), (0.5, 0.5, (2, 3)), (np.ones(3), np.ones(3), (4, 3))], + ) def test_beta(self, a, b, shape): - model = self.build_model(pm.Beta, {'alpha': a, 'beta': b}, shape=shape, transform=tr.logodds) + model = self.build_model( + pm.Beta, {"alpha": a, "beta": b}, shape=shape, transform=tr.logodds + ) self.check_transform_elementwise_logp(model) - @pytest.mark.parametrize('lower,upper,shape', [ - (0., 1., 2), - (.5, 5.5, (2, 3)), - (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)) - ]) + @pytest.mark.parametrize( + "lower,upper,shape", + [ + (0.0, 1.0, 2), + (0.5, 5.5, (2, 3)), + (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)), + ], + ) def test_uniform(self, lower, upper, shape): interval = tr.Interval(lower, upper) - model = self.build_model(pm.Uniform, {'lower': lower, 'upper': upper}, - shape=shape, transform=interval) + model = self.build_model( + pm.Uniform, + {"lower": lower, "upper": upper}, + shape=shape, + transform=interval, + ) self.check_transform_elementwise_logp(model) - @pytest.mark.parametrize('mu,kappa,shape', [ - (0., 1., 2), - (-.5, 5.5, (2, 3)), - (np.zeros(3), np.ones(3), (4, 3)) - ]) + @pytest.mark.parametrize( + "mu,kappa,shape", + [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))], + ) def test_vonmises(self, mu, kappa, shape): - model = self.build_model(pm.VonMises, {'mu': mu, 'kappa': kappa}, shape=shape, transform=tr.circular) + model = self.build_model( + pm.VonMises, {"mu": mu, "kappa": kappa}, shape=shape, transform=tr.circular + ) self.check_transform_elementwise_logp(model) - @pytest.mark.parametrize('a,shape', [ - (np.ones(2), 2), - (np.ones((2, 3))*.5, (2, 3)), - (np.ones(3), (4, 3)) - ]) + @pytest.mark.parametrize( + "a,shape", + [(np.ones(2), 2), (np.ones((2, 3)) * 0.5, (2, 3)), (np.ones(3), (4, 3))], + ) def test_dirichlet(self, a, shape): - model = self.build_model(pm.Dirichlet, {'a': a}, shape=shape, transform=tr.stick_breaking) + model = self.build_model( + pm.Dirichlet, {"a": a}, shape=shape, transform=tr.stick_breaking + ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) def test_normal_ordered(self): - model = self.build_model(pm.Normal, {'mu': 0., 'sd': 1.}, shape=3, - testval=np.asarray([-1., 1., 4.]), - transform=tr.ordered) + model = self.build_model( + pm.Normal, + {"mu": 0.0, "sd": 1.0}, + shape=3, + testval=np.asarray([-1.0, 1.0, 4.0]), + transform=tr.ordered, + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('sd,shape', [ - (2.5, (2,)), - (np.ones(3), (4, 3)), - ]) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.parametrize("sd,shape", [(2.5, (2,)), (np.ones(3), (4, 3))]) + @pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" + ) def test_half_normal_ordered(self, sd, shape): testval = np.sort(np.abs(np.random.randn(*shape))) - model = self.build_model(pm.HalfNormal, {'sd': sd}, shape=shape, - testval=testval, - transform=tr.Chain([tr.log, tr.ordered])) + model = self.build_model( + pm.HalfNormal, + {"sd": sd}, + shape=shape, + testval=testval, + transform=tr.Chain([tr.log, tr.ordered]), + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('lam,shape', [ - (2.5, (2,)), - (np.ones(3), (4, 3)) - ]) + @pytest.mark.parametrize("lam,shape", [(2.5, (2,)), (np.ones(3), (4, 3))]) def test_exponential_ordered(self, lam, shape): testval = np.sort(np.abs(np.random.randn(*shape))) - model = self.build_model(pm.Exponential, {'lam': lam}, shape=shape, - testval=testval, - transform=tr.Chain([tr.log, tr.ordered])) + model = self.build_model( + pm.Exponential, + {"lam": lam}, + shape=shape, + testval=testval, + transform=tr.Chain([tr.log, tr.ordered]), + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('a,b,shape', [ - (1., 1., (2,)), - (np.ones(3), np.ones(3), (4, 3)), - ]) + @pytest.mark.parametrize( + "a,b,shape", [(1.0, 1.0, (2,)), (np.ones(3), np.ones(3), (4, 3))] + ) def test_beta_ordered(self, a, b, shape): testval = np.sort(np.abs(np.random.rand(*shape))) - model = self.build_model(pm.Beta, {'alpha': a, 'beta': b}, shape=shape, - testval=testval, - transform=tr.Chain([tr.logodds, tr.ordered])) + model = self.build_model( + pm.Beta, + {"alpha": a, "beta": b}, + shape=shape, + testval=testval, + transform=tr.Chain([tr.logodds, tr.ordered]), + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('lower,upper,shape', [ - (0., 1., (2,)), - (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)) - ]) + @pytest.mark.parametrize( + "lower,upper,shape", + [(0.0, 1.0, (2,)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))], + ) def test_uniform_ordered(self, lower, upper, shape): interval = tr.Interval(lower, upper) testval = np.sort(np.abs(np.random.rand(*shape))) - model = self.build_model(pm.Uniform, {'lower': lower, 'upper': upper}, shape=shape, - testval=testval, - transform=tr.Chain([interval, tr.ordered])) + model = self.build_model( + pm.Uniform, + {"lower": lower, "upper": upper}, + shape=shape, + testval=testval, + transform=tr.Chain([interval, tr.ordered]), + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('mu,kappa,shape', [ - (0., 1., (2,)), - (np.zeros(3), np.ones(3), (4, 3)) - ]) + @pytest.mark.parametrize( + "mu,kappa,shape", [(0.0, 1.0, (2,)), (np.zeros(3), np.ones(3), (4, 3))] + ) def test_vonmises_ordered(self, mu, kappa, shape): testval = np.sort(np.abs(np.random.rand(*shape))) - model = self.build_model(pm.VonMises, {'mu': mu, 'kappa': kappa}, shape=shape, - testval=testval, - transform=tr.Chain([tr.circular, tr.ordered])) + model = self.build_model( + pm.VonMises, + {"mu": mu, "kappa": kappa}, + shape=shape, + testval=testval, + transform=tr.Chain([tr.circular, tr.ordered]), + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('lower,upper,shape,transform', [ - (0., 1., (2,), tr.stick_breaking), - (.5, 5.5, (2, 3), tr.stick_breaking), - (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])) - ]) + @pytest.mark.parametrize( + "lower,upper,shape,transform", + [ + (0.0, 1.0, (2,), tr.stick_breaking), + (0.5, 5.5, (2, 3), tr.stick_breaking), + (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])), + ], + ) def test_uniform_other(self, lower, upper, shape, transform): - testval = np.ones(shape)/shape[-1] - model = self.build_model(pm.Uniform, {'lower': lower, 'upper': upper}, - shape=shape, - testval=testval, - transform=transform) + testval = np.ones(shape) / shape[-1] + model = self.build_model( + pm.Uniform, + {"lower": lower, "upper": upper}, + shape=shape, + testval=testval, + transform=transform, + ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize('mu,cov,shape', [ - (np.zeros(2), np.diag(np.ones(2)), (2,)), - (np.zeros(3), np.diag(np.ones(3)), (4, 3)), - ]) + @pytest.mark.parametrize( + "mu,cov,shape", + [ + (np.zeros(2), np.diag(np.ones(2)), (2,)), + (np.zeros(3), np.diag(np.ones(3)), (4, 3)), + ], + ) def test_mvnormal_ordered(self, mu, cov, shape): testval = np.sort(np.random.randn(*shape)) - model = self.build_model(pm.MvNormal, {'mu': mu, 'cov': cov}, shape=shape, - testval=testval, - transform=tr.ordered) + model = self.build_model( + pm.MvNormal, + {"mu": mu, "cov": cov}, + shape=shape, + testval=testval, + transform=tr.ordered, + ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) diff --git a/pymc3/tests/test_tuning.py b/pymc3/tests/test_tuning.py index 9da803d693..9f6769fc25 100644 --- a/pymc3/tests/test_tuning.py +++ b/pymc3/tests/test_tuning.py @@ -5,7 +5,7 @@ def test_adjust_precision(): - a = np.array([-10, -.01, 0, 10, 1e300, -inf, inf]) + a = np.array([-10, -0.01, 0, 10, 1e300, -inf, inf]) a1 = scaling.adjust_precision(a) assert all((a1 > 0) & (a1 < 1e200)) @@ -24,7 +24,7 @@ def test_mle_jacobian(): with model: map_estimate = find_MAP(method="BFGS", model=model) - rtol = 1E-5 # this rtol should work on both floatX precisions + rtol = 1e-5 # this rtol should work on both floatX precisions np.testing.assert_allclose(map_estimate["mu_i"], truth, rtol=rtol) start, model, _ = models.simple_normal(bounded_prior=True) diff --git a/pymc3/tests/test_types.py b/pymc3/tests/test_types.py index e59fce37c8..15ae8452b7 100644 --- a/pymc3/tests/test_types.py +++ b/pymc3/tests/test_types.py @@ -22,27 +22,29 @@ def teardown_method(self): # restore theano config theano.config = self.theano_config - @change_flags({'floatX': 'float64', 'warn_float64': 'ignore'}) + @change_flags({"floatX": "float64", "warn_float64": "ignore"}) def test_float64(self): with Model() as model: - x = Normal('x', testval=np.array(1., dtype='float64')) - obs = Normal('obs', mu=x, sd=1., observed=np.random.randn(5)) + x = Normal("x", testval=np.array(1.0, dtype="float64")) + obs = Normal("obs", mu=x, sd=1.0, observed=np.random.randn(5)) - assert x.dtype == 'float64' - assert obs.dtype == 'float64' + assert x.dtype == "float64" + assert obs.dtype == "float64" for sampler in self.samplers: with model: sample(10, sampler()) - @change_flags({'floatX': 'float32', 'warn_float64': 'warn'}) + @change_flags({"floatX": "float32", "warn_float64": "warn"}) def test_float32(self): with Model() as model: - x = Normal('x', testval=np.array(1., dtype='float32')) - obs = Normal('obs', mu=x, sd=1., observed=np.random.randn(5).astype('float32')) + x = Normal("x", testval=np.array(1.0, dtype="float32")) + obs = Normal( + "obs", mu=x, sd=1.0, observed=np.random.randn(5).astype("float32") + ) - assert x.dtype == 'float32' - assert obs.dtype == 'float32' + assert x.dtype == "float32" + assert obs.dtype == "float32" for sampler in self.samplers: with model: diff --git a/pymc3/tests/test_updates.py b/pymc3/tests/test_updates.py index 6b0e641240..78df9270c0 100644 --- a/pymc3/tests/test_updates.py +++ b/pymc3/tests/test_updates.py @@ -11,50 +11,63 @@ adadelta, adam, adamax, - adagrad_window + adagrad_window, ) -_a = theano.shared(1.) -_b = _a*2 +_a = theano.shared(1.0) +_b = _a * 2 -_m = theano.shared(np.empty((10, ), theano.config.floatX)) +_m = theano.shared(np.empty((10,), theano.config.floatX)) _n = _m.sum() _m2 = theano.shared(np.empty((10, 10, 10), theano.config.floatX)) _n2 = _b + _n + _m2.sum() @pytest.mark.parametrize( - 'opt', - [sgd, momentum, nesterov_momentum, - adagrad, rmsprop, adadelta, adam, - adamax, adagrad_window], - ids=['sgd', 'momentum', 'nesterov_momentum', - 'adagrad', 'rmsprop', 'adadelta', 'adam', - 'adamax', 'adagrad_window'] + "opt", + [ + sgd, + momentum, + nesterov_momentum, + adagrad, + rmsprop, + adadelta, + adam, + adamax, + adagrad_window, + ], + ids=[ + "sgd", + "momentum", + "nesterov_momentum", + "adagrad", + "rmsprop", + "adadelta", + "adam", + "adamax", + "adagrad_window", + ], ) @pytest.mark.parametrize( - 'getter', - [lambda t: t, # all params -> ok - lambda t: (None, t[1]), # missing loss -> fail - lambda t: (t[0], None), # missing params -> fail - lambda t: (None, None)], # all missing -> partial - ids=['all_params', - 'missing_loss', - 'missing_params', - 'all_missing'] + "getter", + [ + lambda t: t, # all params -> ok + lambda t: (None, t[1]), # missing loss -> fail + lambda t: (t[0], None), # missing params -> fail + lambda t: (None, None), + ], # all missing -> partial + ids=["all_params", "missing_loss", "missing_params", "all_missing"], ) @pytest.mark.parametrize( - 'kwargs', - [dict(), dict(learning_rate=1e-2)], - ids=['without_args', 'with_args'] + "kwargs", [dict(), dict(learning_rate=1e-2)], ids=["without_args", "with_args"] ) @pytest.mark.parametrize( - 'loss_and_params', + "loss_and_params", [(_b, [_a]), (_n, [_m]), (_n2, [_a, _m, _m2])], - ids=['scalar', 'matrix', 'mixed'] + ids=["scalar", "matrix", "mixed"], ) def test_updates_fast(opt, loss_and_params, kwargs, getter): - with change_flags(compute_test_value='ignore'): + with change_flags(compute_test_value="ignore"): loss, param = getter(loss_and_params) args = dict() args.update(**kwargs) diff --git a/pymc3/tests/test_util.py b/pymc3/tests/test_util.py index 52e567cc75..15fc66ad31 100644 --- a/pymc3/tests/test_util.py +++ b/pymc3/tests/test_util.py @@ -8,11 +8,8 @@ class TestTransformName(object): - cases = [ - ('var', 'var_test__'), - ('var_test_', 'var_test__test__') - ] - transform_name = 'test' + cases = [("var", "var_test__"), ("var_test_", "var_test__test__")] + transform_name = "test" def test_get_transformed_name(self): test_transform = Transform() @@ -32,55 +29,51 @@ def test_get_untransformed_name(self): pm.util.get_untransformed_name(name) - class TestUpdateStartVals(SeededTest): def setup_method(self): super(TestUpdateStartVals, self).setup_method() - + def test_soft_update_all_present(self): - start = {'a': 1, 'b': 2} - test_point = {'a': 3, 'b': 4} + start = {"a": 1, "b": 2} + test_point = {"a": 3, "b": 4} pm.util.update_start_vals(start, test_point, model=None) - assert start == {'a': 1, 'b': 2} - + assert start == {"a": 1, "b": 2} + def test_soft_update_one_missing(self): - start = {'a': 1, } - test_point = {'a': 3, 'b': 4} + start = {"a": 1} + test_point = {"a": 3, "b": 4} pm.util.update_start_vals(start, test_point, model=None) - assert start == {'a': 1, 'b': 4} - + assert start == {"a": 1, "b": 4} + def test_soft_update_empty(self): start = {} - test_point = {'a': 3, 'b': 4} + test_point = {"a": 3, "b": 4} pm.util.update_start_vals(start, test_point, model=None) assert start == test_point - + def test_soft_update_transformed(self): with pm.Model() as model: - pm.Exponential('a', 1) - start = {'a': 2.} - test_point = {'a_log__': 0} + pm.Exponential("a", 1) + start = {"a": 2.0} + test_point = {"a_log__": 0} pm.util.update_start_vals(start, test_point, model) - assert_almost_equal(np.exp(start['a_log__']), start['a']) - + assert_almost_equal(np.exp(start["a_log__"]), start["a"]) + def test_soft_update_parent(self): with pm.Model() as model: - a = pm.Uniform('a', lower=0., upper=1.) - b = pm.Uniform('b', lower=2., upper=3.) - pm.Uniform('lower', lower=a, upper=3.) - pm.Uniform('upper', lower=0., upper=b) - pm.Uniform('interv', lower=a, upper=b) - - start = {'a': .3, 'b': 2.1, 'lower': 1.4, 'upper': 1.4, 'interv':1.4} - test_point = {'lower_interval__': -0.3746934494414109, - 'upper_interval__': 0.693147180559945, - 'interv_interval__': 0.4519851237430569} - pm.util.update_start_vals(start, model.test_point, model) - assert_almost_equal(start['lower_interval__'], - test_point['lower_interval__']) - assert_almost_equal(start['upper_interval__'], - test_point['upper_interval__']) - assert_almost_equal(start['interv_interval__'], - test_point['interv_interval__']) - + a = pm.Uniform("a", lower=0.0, upper=1.0) + b = pm.Uniform("b", lower=2.0, upper=3.0) + pm.Uniform("lower", lower=a, upper=3.0) + pm.Uniform("upper", lower=0.0, upper=b) + pm.Uniform("interv", lower=a, upper=b) + start = {"a": 0.3, "b": 2.1, "lower": 1.4, "upper": 1.4, "interv": 1.4} + test_point = { + "lower_interval__": -0.3746934494414109, + "upper_interval__": 0.693147180559945, + "interv_interval__": 0.4519851237430569, + } + pm.util.update_start_vals(start, model.test_point, model) + assert_almost_equal(start["lower_interval__"], test_point["lower_interval__"]) + assert_almost_equal(start["upper_interval__"], test_point["upper_interval__"]) + assert_almost_equal(start["interv_interval__"], test_point["interv_interval__"]) diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index 867a129579..f16e49d5d7 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -11,42 +11,34 @@ import pymc3.util from pymc3.theanof import change_flags from pymc3.variational.approximations import ( - MeanFieldGroup, FullRankGroup, - NormalizingFlowGroup, EmpiricalGroup, - MeanField, FullRank, NormalizingFlow, Empirical -) -from pymc3.variational.inference import ( - ADVI, FullRankADVI, SVGD, NFVI, ASVGD, - fit + MeanFieldGroup, + FullRankGroup, + NormalizingFlowGroup, + EmpiricalGroup, + MeanField, + FullRank, + NormalizingFlow, + Empirical, ) +from pymc3.variational.inference import ADVI, FullRankADVI, SVGD, NFVI, ASVGD, fit from pymc3.variational import flows from pymc3.variational.opvi import Approximation, Group from pymc3.variational import opvi from . import models from .helpers import not_raises -pytestmark = pytest.mark.usefixtures( - 'strict_float32', - 'seeded_test' -) +pytestmark = pytest.mark.usefixtures("strict_float32", "seeded_test") -@pytest.mark.parametrize( - 'diff', - [ - 'relative', - 'absolute' - ] -) -@pytest.mark.parametrize( - 'ord', - [1, 2, np.inf] -) +@pytest.mark.parametrize("diff", ["relative", "absolute"]) +@pytest.mark.parametrize("ord", [1, 2, np.inf]) def test_callbacks_convergence(diff, ord): - cb = pm.variational.callbacks.CheckParametersConvergence(every=1, diff=diff, ord=ord) + cb = pm.variational.callbacks.CheckParametersConvergence( + every=1, diff=diff, ord=ord + ) class _approx: - params = (theano.shared(np.asarray([1, 2, 3])), ) + params = (theano.shared(np.asarray([1, 2, 3])),) approx = _approx() @@ -57,55 +49,68 @@ class _approx: def test_tracker_callback(): import time + tracker = pm.callbacks.Tracker( - ints=lambda *t: t[-1], - ints2=lambda ap, h, j: j, - time=time.time, + ints=lambda *t: t[-1], ints2=lambda ap, h, j: j, time=time.time ) for i in range(10): tracker(None, None, i) - assert 'time' in tracker.hist - assert 'ints' in tracker.hist - assert 'ints2' in tracker.hist - assert (len(tracker['ints']) - == len(tracker['ints2']) - == len(tracker['time']) - == 10) - assert tracker['ints'] == tracker['ints2'] == list(range(10)) - tracker = pm.callbacks.Tracker( - bad=lambda t: t # bad signature - ) + assert "time" in tracker.hist + assert "ints" in tracker.hist + assert "ints2" in tracker.hist + assert len(tracker["ints"]) == len(tracker["ints2"]) == len(tracker["time"]) == 10 + assert tracker["ints"] == tracker["ints2"] == list(range(10)) + tracker = pm.callbacks.Tracker(bad=lambda t: t) # bad signature with pytest.raises(TypeError): tracker(None, None, 1) -@pytest.fixture('module') +@pytest.fixture("module") def three_var_model(): with pm.Model() as model: - pm.HalfNormal('one', shape=(10, 2), total_size=100) - pm.Normal('two', shape=(10, )) - pm.Normal('three', shape=(10, 1, 2)) + pm.HalfNormal("one", shape=(10, 2), total_size=100) + pm.Normal("two", shape=(10,)) + pm.Normal("three", shape=(10, 1, 2)) return model @pytest.mark.parametrize( - ['raises', 'grouping'], + ["raises", "grouping"], [ (not_raises(), {MeanFieldGroup: None}), - (not_raises(), {FullRankGroup: None, MeanFieldGroup: ['one']}), - (not_raises(), {MeanFieldGroup: ['one'], FullRankGroup: ['two'], NormalizingFlowGroup: ['three']}), - (pytest.raises(TypeError, match='Found duplicates'), - {MeanFieldGroup: ['one'], FullRankGroup: ['two', 'one'], NormalizingFlowGroup: ['three']}), - (pytest.raises(TypeError, match='No approximation is specified'), {MeanFieldGroup: ['one', 'two']}), - (not_raises(), {MeanFieldGroup: ['one'], FullRankGroup: ['two', 'three']}), - ] + (not_raises(), {FullRankGroup: None, MeanFieldGroup: ["one"]}), + ( + not_raises(), + { + MeanFieldGroup: ["one"], + FullRankGroup: ["two"], + NormalizingFlowGroup: ["three"], + }, + ), + ( + pytest.raises(TypeError, match="Found duplicates"), + { + MeanFieldGroup: ["one"], + FullRankGroup: ["two", "one"], + NormalizingFlowGroup: ["three"], + }, + ), + ( + pytest.raises(TypeError, match="No approximation is specified"), + {MeanFieldGroup: ["one", "two"]}, + ), + (not_raises(), {MeanFieldGroup: ["one"], FullRankGroup: ["two", "three"]}), + ], ) def test_init_groups(three_var_model, raises, grouping): with raises, three_var_model: approxes, groups = zip(*grouping.items()) - groups = [list(map(functools.partial(getattr, three_var_model), g)) - if g is not None else None - for g in groups] + groups = [ + list(map(functools.partial(getattr, three_var_model), g)) + if g is not None + else None + for g in groups + ] inited_groups = [a(group=g) for a, g in zip(approxes, groups)] approx = Approximation(inited_groups) for ig, g in zip(inited_groups, groups): @@ -117,24 +122,46 @@ def test_init_groups(three_var_model, raises, grouping): assert approx.ndim == three_var_model.ndim -@pytest.fixture(params=[ +@pytest.fixture( + params=[ ({}, {MeanFieldGroup: (None, {})}), - ({}, {FullRankGroup: (None, {}), MeanFieldGroup: (['one'], {})}), - ({}, {MeanFieldGroup: (['one'], {}), FullRankGroup: (['two'], {}), - NormalizingFlowGroup: (['three'], {'flow': 'scale-hh*2-planar-radial-loc'})}), - ({}, {MeanFieldGroup: (['one'], {}), FullRankGroup: (['two', 'three'], {})}), - ({}, {MeanFieldGroup: (['one'], {}), EmpiricalGroup: (['two', 'three'], {'size': 100})}) -], - ids=lambda t: ', '.join('%s: %s' % (k.__name__, v[0]) for k, v in t[1].items()) + ({}, {FullRankGroup: (None, {}), MeanFieldGroup: (["one"], {})}), + ( + {}, + { + MeanFieldGroup: (["one"], {}), + FullRankGroup: (["two"], {}), + NormalizingFlowGroup: ( + ["three"], + {"flow": "scale-hh*2-planar-radial-loc"}, + ), + }, + ), + ({}, {MeanFieldGroup: (["one"], {}), FullRankGroup: (["two", "three"], {})}), + ( + {}, + { + MeanFieldGroup: (["one"], {}), + EmpiricalGroup: (["two", "three"], {"size": 100}), + }, + ), + ], + ids=lambda t: ", ".join("%s: %s" % (k.__name__, v[0]) for k, v in t[1].items()), ) def three_var_groups(request, three_var_model): kw, grouping = request.param approxes, groups = zip(*grouping.items()) groups, gkwargs = zip(*groups) - groups = [list(map(functools.partial(getattr, three_var_model), g)) - if g is not None else None - for g in groups] - inited_groups = [a(group=g, model=three_var_model, **gk) for a, g, gk in zip(approxes, groups, gkwargs)] + groups = [ + list(map(functools.partial(getattr, three_var_model), g)) + if g is not None + else None + for g in groups + ] + inited_groups = [ + a(group=g, model=three_var_model, **gk) + for a, g, gk in zip(approxes, groups, gkwargs) + ] return inited_groups @@ -151,30 +178,30 @@ def three_var_approx_single_group_mf(three_var_model): def test_sample_simple(three_var_approx): trace = three_var_approx.sample(500) - assert set(trace.varnames) == {'one', 'one_log__', 'three', 'two'} + assert set(trace.varnames) == {"one", "one_log__", "three", "two"} assert len(trace) == 500 - assert trace[0]['one'].shape == (10, 2) - assert trace[0]['two'].shape == (10, ) - assert trace[0]['three'].shape == (10, 1, 2) + assert trace[0]["one"].shape == (10, 2) + assert trace[0]["two"].shape == (10,) + assert trace[0]["three"].shape == (10, 1, 2) @pytest.fixture def aevb_initial(): - return theano.shared(np.random.rand(3, 7).astype('float32')) + return theano.shared(np.random.rand(3, 7).astype("float32")) @pytest.fixture( params=[ (MeanFieldGroup, {}), (FullRankGroup, {}), - (NormalizingFlowGroup, {'flow': 'scale'}), - (NormalizingFlowGroup, {'flow': 'loc'}), - (NormalizingFlowGroup, {'flow': 'hh'}), - (NormalizingFlowGroup, {'flow': 'planar'}), - (NormalizingFlowGroup, {'flow': 'radial'}), - (NormalizingFlowGroup, {'flow': 'radial-loc'}) + (NormalizingFlowGroup, {"flow": "scale"}), + (NormalizingFlowGroup, {"flow": "loc"}), + (NormalizingFlowGroup, {"flow": "hh"}), + (NormalizingFlowGroup, {"flow": "planar"}), + (NormalizingFlowGroup, {"flow": "radial"}), + (NormalizingFlowGroup, {"flow": "radial-loc"}), ], - ids=lambda t: '{c} : {d}'.format(c=t[0].__name__, d=t[1]) + ids=lambda t: "{c} : {d}".format(c=t[0].__name__, d=t[1]), ) def parametric_grouped_approxes(request): return request.param @@ -190,10 +217,14 @@ def three_var_aevb_groups(parametric_grouped_approxes, three_var_model, aevb_ini if isinstance(k, int): params[k] = dict() for k_i, v_i in v.items(): - params[k][k_i] = aevb_initial.dot(np.random.rand(7, *v_i).astype('float32')) + params[k][k_i] = aevb_initial.dot( + np.random.rand(7, *v_i).astype("float32") + ) else: - params[k] = aevb_initial.dot(np.random.rand(7, *v).astype('float32')) - aevb_g = cls([three_var_model.one], params=params, model=three_var_model, local=True) + params[k] = aevb_initial.dot(np.random.rand(7, *v).astype("float32")) + aevb_g = cls( + [three_var_model.one], params=params, model=three_var_model, local=True + ) return [aevb_g, MeanFieldGroup(None, model=three_var_model)] @@ -204,35 +235,35 @@ def three_var_aevb_approx(three_var_model, three_var_aevb_groups): def test_sample_aevb(three_var_aevb_approx, aevb_initial): - pm.KLqp(three_var_aevb_approx).fit(1, more_replacements={ - aevb_initial: np.zeros_like(aevb_initial.get_value())[:1] - }) - aevb_initial.set_value(np.random.rand(7, 7).astype('float32')) + pm.KLqp(three_var_aevb_approx).fit( + 1, more_replacements={aevb_initial: np.zeros_like(aevb_initial.get_value())[:1]} + ) + aevb_initial.set_value(np.random.rand(7, 7).astype("float32")) trace = three_var_aevb_approx.sample(500) - assert set(trace.varnames) == {'one', 'one_log__', 'two', 'three'} + assert set(trace.varnames) == {"one", "one_log__", "two", "three"} assert len(trace) == 500 - assert trace[0]['one'].shape == (7, 2) - assert trace[0]['two'].shape == (10, ) - assert trace[0]['three'].shape == (10, 1, 2) + assert trace[0]["one"].shape == (7, 2) + assert trace[0]["two"].shape == (10,) + assert trace[0]["three"].shape == (10, 1, 2) - aevb_initial.set_value(np.random.rand(13, 7).astype('float32')) + aevb_initial.set_value(np.random.rand(13, 7).astype("float32")) trace = three_var_aevb_approx.sample(500) - assert set(trace.varnames) == {'one', 'one_log__', 'two', 'three'} + assert set(trace.varnames) == {"one", "one_log__", "two", "three"} assert len(trace) == 500 - assert trace[0]['one'].shape == (13, 2) - assert trace[0]['two'].shape == (10,) - assert trace[0]['three'].shape == (10, 1, 2) + assert trace[0]["one"].shape == (13, 2) + assert trace[0]["two"].shape == (10,) + assert trace[0]["three"].shape == (10, 1, 2) def test_replacements_in_sample_node_aevb(three_var_aevb_approx, aevb_initial): - inp = tt.matrix(dtype='float32') + inp = tt.matrix(dtype="float32") three_var_aevb_approx.sample_node( - three_var_aevb_approx.model.one, 2, - more_replacements={aevb_initial: inp}).eval({inp: np.random.rand(7, 7).astype('float32')}) + three_var_aevb_approx.model.one, 2, more_replacements={aevb_initial: inp} + ).eval({inp: np.random.rand(7, 7).astype("float32")}) three_var_aevb_approx.sample_node( - three_var_aevb_approx.model.one, None, - more_replacements={aevb_initial: inp}).eval({inp: np.random.rand(7, 7).astype('float32')}) + three_var_aevb_approx.model.one, None, more_replacements={aevb_initial: inp} + ).eval({inp: np.random.rand(7, 7).astype("float32")}) def test_vae(): @@ -242,24 +273,28 @@ def test_vae(): x_inp = tt.vector() x_inp.tag.test_value = data[:minibatch_size] - ae = theano.shared(pm.floatX([.1, .1])) - be = theano.shared(pm.floatX(1.)) + ae = theano.shared(pm.floatX([0.1, 0.1])) + be = theano.shared(pm.floatX(1.0)) - ad = theano.shared(pm.floatX(1.)) - bd = theano.shared(pm.floatX(1.)) + ad = theano.shared(pm.floatX(1.0)) + bd = theano.shared(pm.floatX(1.0)) - enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be - mu, rho = enc[:, 0], enc[:, 1] + enc = x_inp.dimshuffle(0, "x") * ae.dimshuffle("x", 0) + be + mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables - zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size) + zs = pm.Normal("zs", mu=0, sd=1, shape=minibatch_size) dec = zs * ad + bd # Observation model - pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp) + pm.Normal("xs_", mu=dec, sd=0.1, observed=x_inp) - pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)}, - more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd]) + pm.fit( + 1, + local_rv={zs: dict(mu=mu, rho=rho)}, + more_replacements={x_inp: x_mini}, + more_obj_params=[ae, be, ad, bd], + ) def test_logq_mini_1_sample_1_var(parametric_grouped_approxes, three_var_model): @@ -272,7 +307,9 @@ def test_logq_mini_1_sample_1_var(parametric_grouped_approxes, three_var_model): def test_logq_mini_2_sample_2_var(parametric_grouped_approxes, three_var_model): cls, kw = parametric_grouped_approxes - approx = cls([three_var_model.one, three_var_model.two], model=three_var_model, **kw) + approx = cls( + [three_var_model.one, three_var_model.two], model=three_var_model, **kw + ) logq = approx.logq logq = approx.set_size_and_deterministic(logq, 2, 0) logq.eval() @@ -280,7 +317,9 @@ def test_logq_mini_2_sample_2_var(parametric_grouped_approxes, three_var_model): def test_logq_mini_sample_aevb(three_var_aevb_groups): approx = three_var_aevb_groups[0] - logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 3, 0) + logq, symbolic_logq = approx.set_size_and_deterministic( + [approx.logq, approx.symbolic_logq], 3, 0 + ) e = logq.eval() es = symbolic_logq.eval() assert e.shape == () @@ -289,13 +328,17 @@ def test_logq_mini_sample_aevb(three_var_aevb_groups): def test_logq_aevb(three_var_aevb_approx): approx = three_var_aevb_approx - logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 1, 0) + logq, symbolic_logq = approx.set_size_and_deterministic( + [approx.logq, approx.symbolic_logq], 1, 0 + ) e = logq.eval() es = symbolic_logq.eval() assert e.shape == () assert es.shape == (1,) - logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 2, 0) + logq, symbolic_logq = approx.set_size_and_deterministic( + [approx.logq, approx.symbolic_logq], 2, 0 + ) e = logq.eval() es = symbolic_logq.eval() assert e.shape == () @@ -304,15 +347,19 @@ def test_logq_aevb(three_var_aevb_approx): def test_logq_globals(three_var_approx): if not three_var_approx.has_logq: - pytest.skip('%s does not implement logq' % three_var_approx) + pytest.skip("%s does not implement logq" % three_var_approx) approx = three_var_approx - logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 1, 0) + logq, symbolic_logq = approx.set_size_and_deterministic( + [approx.logq, approx.symbolic_logq], 1, 0 + ) e = logq.eval() es = symbolic_logq.eval() assert e.shape == () assert es.shape == (1,) - logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 2, 0) + logq, symbolic_logq = approx.set_size_and_deterministic( + [approx.logq, approx.symbolic_logq], 2, 0 + ) e = logq.eval() es = symbolic_logq.eval() assert e.shape == () @@ -320,79 +367,118 @@ def test_logq_globals(three_var_approx): @pytest.mark.parametrize( - 'raises, vfam, type_, kw', + "raises, vfam, type_, kw", [ - (not_raises(), 'mean_field', MeanFieldGroup, {}), - (not_raises(), 'mf', MeanFieldGroup, {}), - (not_raises(), 'full_rank', FullRankGroup, {}), - (not_raises(), 'fr', FullRankGroup, {}), - (not_raises(), 'FR', FullRankGroup, {}), - (not_raises(), 'loc', NormalizingFlowGroup, {}), - (not_raises(), 'scale', NormalizingFlowGroup, {}), - (not_raises(), 'hh', NormalizingFlowGroup, {}), - (not_raises(), 'planar', NormalizingFlowGroup, {}), - (not_raises(), 'radial', NormalizingFlowGroup, {}), - (not_raises(), 'scale-loc', NormalizingFlowGroup, {}), - (pytest.raises(ValueError, match='Need `trace` or `size`'), 'empirical', EmpiricalGroup, {}), - (not_raises(), 'empirical', EmpiricalGroup, {'size': 100}), - ] + (not_raises(), "mean_field", MeanFieldGroup, {}), + (not_raises(), "mf", MeanFieldGroup, {}), + (not_raises(), "full_rank", FullRankGroup, {}), + (not_raises(), "fr", FullRankGroup, {}), + (not_raises(), "FR", FullRankGroup, {}), + (not_raises(), "loc", NormalizingFlowGroup, {}), + (not_raises(), "scale", NormalizingFlowGroup, {}), + (not_raises(), "hh", NormalizingFlowGroup, {}), + (not_raises(), "planar", NormalizingFlowGroup, {}), + (not_raises(), "radial", NormalizingFlowGroup, {}), + (not_raises(), "scale-loc", NormalizingFlowGroup, {}), + ( + pytest.raises(ValueError, match="Need `trace` or `size`"), + "empirical", + EmpiricalGroup, + {}, + ), + (not_raises(), "empirical", EmpiricalGroup, {"size": 100}), + ], ) def test_group_api_vfam(three_var_model, raises, vfam, type_, kw): with three_var_model, raises: g = Group([three_var_model.one], vfam, **kw) assert isinstance(g, type_) - assert not hasattr(g, '_kwargs') + assert not hasattr(g, "_kwargs") if isinstance(g, NormalizingFlowGroup): assert isinstance(g.flow, pm.flows.AbstractFlow) assert g.flow.formula == vfam @pytest.mark.parametrize( - 'raises, params, type_, kw, formula', + "raises, params, type_, kw, formula", [ - (not_raises(), - dict(mu=np.ones((10, 2), 'float32'), rho=np.ones((10, 2), 'float32')), - MeanFieldGroup, {}, None), - - (not_raises(), - dict(mu=np.ones((10, 2), 'float32'), - L_tril=np.ones( - FullRankGroup.get_param_spec_for(d=np.prod((10, 2)))['L_tril'], - 'float32' - )), - FullRankGroup, {}, None), - - (not_raises(), - {0: dict(loc=np.ones((10, 2), 'float32'))}, - NormalizingFlowGroup, {}, 'loc'), - - (not_raises(), - {0: dict(rho=np.ones((10, 2), 'float32'))}, - NormalizingFlowGroup, {}, 'scale'), - - (not_raises(), - {0: dict(v=np.ones((10, 2), 'float32'),)}, - NormalizingFlowGroup, {}, 'hh'), - - (not_raises(), - {0: dict(u=np.ones((10, 2), 'float32'), - w=np.ones((10, 2), 'float32'), - b=1.)}, - NormalizingFlowGroup, {}, 'planar'), - - (not_raises(), - {0: dict(z_ref=np.ones((10, 2), 'float32'), - a=1., - b=1.)}, - NormalizingFlowGroup, {}, 'radial'), - - (not_raises(), - {0: dict(rho=np.ones((10, 2), 'float32')), - 1: dict(loc=np.ones((10, 2), 'float32'))}, - NormalizingFlowGroup, {}, 'scale-loc'), - - (not_raises(), dict(histogram=np.ones((20, 10, 2), 'float32')), EmpiricalGroup, {}, None), - ] + ( + not_raises(), + dict(mu=np.ones((10, 2), "float32"), rho=np.ones((10, 2), "float32")), + MeanFieldGroup, + {}, + None, + ), + ( + not_raises(), + dict( + mu=np.ones((10, 2), "float32"), + L_tril=np.ones( + FullRankGroup.get_param_spec_for(d=np.prod((10, 2)))["L_tril"], + "float32", + ), + ), + FullRankGroup, + {}, + None, + ), + ( + not_raises(), + {0: dict(loc=np.ones((10, 2), "float32"))}, + NormalizingFlowGroup, + {}, + "loc", + ), + ( + not_raises(), + {0: dict(rho=np.ones((10, 2), "float32"))}, + NormalizingFlowGroup, + {}, + "scale", + ), + ( + not_raises(), + {0: dict(v=np.ones((10, 2), "float32"))}, + NormalizingFlowGroup, + {}, + "hh", + ), + ( + not_raises(), + { + 0: dict( + u=np.ones((10, 2), "float32"), w=np.ones((10, 2), "float32"), b=1.0 + ) + }, + NormalizingFlowGroup, + {}, + "planar", + ), + ( + not_raises(), + {0: dict(z_ref=np.ones((10, 2), "float32"), a=1.0, b=1.0)}, + NormalizingFlowGroup, + {}, + "radial", + ), + ( + not_raises(), + { + 0: dict(rho=np.ones((10, 2), "float32")), + 1: dict(loc=np.ones((10, 2), "float32")), + }, + NormalizingFlowGroup, + {}, + "scale-loc", + ), + ( + not_raises(), + dict(histogram=np.ones((20, 10, 2), "float32")), + EmpiricalGroup, + {}, + None, + ), + ], ) def test_group_api_params(three_var_model, raises, params, type_, kw, formula): with three_var_model, raises: @@ -408,15 +494,15 @@ def test_group_api_params(three_var_model, raises, params, type_, kw, formula): @pytest.mark.parametrize( - 'gcls, approx, kw', + "gcls, approx, kw", [ (MeanFieldGroup, MeanField, {}), (FullRankGroup, FullRank, {}), - (EmpiricalGroup, Empirical, {'size': 100}), - (NormalizingFlowGroup, NormalizingFlow, {'flow': 'loc'}), - (NormalizingFlowGroup, NormalizingFlow, {'flow': 'scale-loc-scale'}), - (NormalizingFlowGroup, NormalizingFlow, {}) - ] + (EmpiricalGroup, Empirical, {"size": 100}), + (NormalizingFlowGroup, NormalizingFlow, {"flow": "loc"}), + (NormalizingFlowGroup, NormalizingFlow, {"flow": "scale-loc-scale"}), + (NormalizingFlowGroup, NormalizingFlow, {}), + ], ) def test_single_group_shortcuts(three_var_model, approx, kw, gcls): with three_var_model: @@ -425,7 +511,7 @@ def test_single_group_shortcuts(three_var_model, approx, kw, gcls): assert len(a.groups) == 1 assert isinstance(a.groups[0], gcls) if isinstance(a, NormalizingFlow): - assert a.flow.formula == kw.get('flow', NormalizingFlowGroup.default_flow) + assert a.flow.formula == kw.get("flow", NormalizingFlowGroup.default_flow) def test_elbo(): @@ -437,37 +523,39 @@ def test_elbo(): post_sd = np.array([1], dtype=theano.config.floatX) # Create a model for test with pm.Model() as model: - mu = pm.Normal('mu', mu=mu0, sd=sigma) - pm.Normal('y', mu=mu, sd=1, observed=y_obs) + mu = pm.Normal("mu", mu=mu0, sd=sigma) + pm.Normal("y", mu=mu, sd=1, observed=y_obs) # Create variational gradient tensor mean_field = MeanField(model=model) - with pm.theanof.change_flags(compute_test_value='off'): + with pm.theanof.change_flags(compute_test_value="off"): elbo = -pm.operators.KL(mean_field)()(10000) - mean_field.shared_params['mu'].set_value(post_mu) - mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) + mean_field.shared_params["mu"].set_value(post_mu) + mean_field.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1)) f = theano.function([], elbo) elbo_mc = f() # Exact value - elbo_true = (-0.5 * ( - 3 + 3 * post_mu ** 2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu + - y_obs[0] ** 2 + y_obs[1] ** 2 + mu0 ** 2 + 3 * np.log(2 * np.pi)) + - 0.5 * (np.log(2 * np.pi) + 1)) + elbo_true = -0.5 * ( + 3 + + 3 * post_mu ** 2 + - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu + + y_obs[0] ** 2 + + y_obs[1] ** 2 + + mu0 ** 2 + + 3 * np.log(2 * np.pi) + ) + 0.5 * (np.log(2 * np.pi) + 1) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1) -@pytest.mark.parametrize( - 'aux_total_size', - range(2, 10, 3) -) +@pytest.mark.parametrize("aux_total_size", range(2, 10, 3)) def test_scale_cost_to_minibatch_works(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) - beta = len(y_obs)/float(aux_total_size) + beta = len(y_obs) / float(aux_total_size) post_mu = np.array([1.88], dtype=theano.config.floatX) post_sd = np.array([1], dtype=theano.config.floatX) @@ -475,215 +563,204 @@ def test_scale_cost_to_minibatch_works(aux_total_size): # with pm.Model(theano_config=dict(floatX='float64')): # did not not work as expected # there were some numeric problems, so float64 is forced - with pm.theanof.change_flags(floatX='float64', warn_float64='ignore'): + with pm.theanof.change_flags(floatX="float64", warn_float64="ignore"): with pm.Model(): - assert theano.config.floatX == 'float64' - assert theano.config.warn_float64 == 'ignore' - mu = pm.Normal('mu', mu=mu0, sd=sigma) - pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) + assert theano.config.floatX == "float64" + assert theano.config.warn_float64 == "ignore" + mu = pm.Normal("mu", mu=mu0, sd=sigma) + pm.Normal("y", mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() assert mean_field_1.scale_cost_to_minibatch - mean_field_1.shared_params['mu'].set_value(post_mu) - mean_field_1.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) + mean_field_1.shared_params["mu"].set_value(post_mu) + mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1)) - with pm.theanof.change_flags(compute_test_value='off'): + with pm.theanof.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): - mu = pm.Normal('mu', mu=mu0, sd=sigma) - pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) + mu = pm.Normal("mu", mu=mu0, sd=sigma) + pm.Normal("y", mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_2 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_2.scale_cost_to_minibatch = False assert not mean_field_2.scale_cost_to_minibatch - mean_field_2.shared_params['mu'].set_value(post_mu) - mean_field_2.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) + mean_field_2.shared_params["mu"].set_value(post_mu) + mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1)) - with pm.theanof.change_flags(compute_test_value='off'): + with pm.theanof.change_flags(compute_test_value="off"): elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000) - np.testing.assert_allclose(elbo_via_total_size_unscaled.eval(), - elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1) + np.testing.assert_allclose( + elbo_via_total_size_unscaled.eval(), + elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), + rtol=0.02, + atol=1e-1, + ) -@pytest.mark.parametrize( - 'aux_total_size', - range(2, 10, 3) -) +@pytest.mark.parametrize("aux_total_size", range(2, 10, 3)) def test_elbo_beta_kl(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) - beta = len(y_obs)/float(aux_total_size) + beta = len(y_obs) / float(aux_total_size) post_mu = np.array([1.88], dtype=theano.config.floatX) post_sd = np.array([1], dtype=theano.config.floatX) - with pm.theanof.change_flags(floatX='float64', warn_float64='ignore'): + with pm.theanof.change_flags(floatX="float64", warn_float64="ignore"): with pm.Model(): - mu = pm.Normal('mu', mu=mu0, sd=sigma) - pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) + mu = pm.Normal("mu", mu=mu0, sd=sigma) + pm.Normal("y", mu=mu, sd=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() mean_field_1.scale_cost_to_minibatch = True - mean_field_1.shared_params['mu'].set_value(post_mu) - mean_field_1.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) + mean_field_1.shared_params["mu"].set_value(post_mu) + mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1)) - with pm.theanof.change_flags(compute_test_value='off'): + with pm.theanof.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): - mu = pm.Normal('mu', mu=mu0, sd=sigma) - pm.Normal('y', mu=mu, sd=1, observed=y_obs) + mu = pm.Normal("mu", mu=mu0, sd=sigma) + pm.Normal("y", mu=mu, sd=1, observed=y_obs) # Create variational gradient tensor mean_field_3 = MeanField() - mean_field_3.shared_params['mu'].set_value(post_mu) - mean_field_3.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) + mean_field_3.shared_params["mu"].set_value(post_mu) + mean_field_3.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1)) - with pm.theanof.change_flags(compute_test_value='off'): + with pm.theanof.change_flags(compute_test_value="off"): elbo_via_beta_kl = -pm.operators.KL(mean_field_3, beta=beta)()(10000) - np.testing.assert_allclose(elbo_via_total_size_scaled.eval(), elbo_via_beta_kl.eval(), rtol=0, atol=1e-1) + np.testing.assert_allclose( + elbo_via_total_size_scaled.eval(), + elbo_via_beta_kl.eval(), + rtol=0, + atol=1e-1, + ) -@pytest.fixture( - 'module', - params=[True, False], - ids=['mini', 'full'] -) +@pytest.fixture("module", params=[True, False], ids=["mini", "full"]) def use_minibatch(request): return request.param -@pytest.fixture('module') +@pytest.fixture("module") def simple_model_data(use_minibatch): n = 1000 - sd0 = 2. - mu0 = 4. - sd = 3. - mu = -5. + sd0 = 2.0 + mu0 = 4.0 + sd = 3.0 + mu = -5.0 data = sd * np.random.randn(n) + mu d = n / sd ** 2 + 1 / sd0 ** 2 mu_post = (n * np.mean(data) / sd ** 2 + mu0 / sd0 ** 2) / d if use_minibatch: data = pm.Minibatch(data) - return dict( - n=n, - data=data, - mu_post=mu_post, - d=d, - mu0=mu0, - sd0=sd0, - sd=sd, - ) + return dict(n=n, data=data, mu_post=mu_post, d=d, mu0=mu0, sd0=sd0, sd=sd) -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def simple_model(simple_model_data): with pm.Model() as model: mu_ = pm.Normal( - 'mu', mu=simple_model_data['mu0'], - sd=simple_model_data['sd0'], testval=0) - pm.Normal('x', mu=mu_, sd=simple_model_data['sd'], - observed=simple_model_data['data'], - total_size=simple_model_data['n']) + "mu", mu=simple_model_data["mu0"], sd=simple_model_data["sd0"], testval=0 + ) + pm.Normal( + "x", + mu=mu_, + sd=simple_model_data["sd"], + observed=simple_model_data["data"], + total_size=simple_model_data["n"], + ) return model -@pytest.fixture('module', params=[ - dict(cls=NFVI, init=dict(flow='scale-loc')), +@pytest.fixture( + "module", + params=[ + dict(cls=NFVI, init=dict(flow="scale-loc")), dict(cls=ADVI, init=dict()), dict(cls=FullRankADVI, init=dict()), dict(cls=SVGD, init=dict(n_particles=500, jitter=1)), - dict(cls=ASVGD, init=dict(temperature=1.)), - ], ids=[ - 'NFVI=scale-loc', - 'ADVI', - 'FullRankADVI', - 'SVGD', - 'ASVGD' - ]) + dict(cls=ASVGD, init=dict(temperature=1.0)), + ], + ids=["NFVI=scale-loc", "ADVI", "FullRankADVI", "SVGD", "ASVGD"], +) def inference_spec(request): - cls = request.param['cls'] - init = request.param['init'] + cls = request.param["cls"] + init = request.param["init"] def init_(**kw): k = init.copy() k.update(kw) return cls(**k) + init_.cls = cls return init_ -@pytest.fixture('function') +@pytest.fixture("function") def inference(inference_spec, simple_model): with simple_model: return inference_spec() -@pytest.fixture('function') +@pytest.fixture("function") def fit_kwargs(inference, use_minibatch): _select = { - (ADVI, 'full'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.02, n_win=50), - n=5000 + (ADVI, "full"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.02, n_win=50), n=5000 ), - (ADVI, 'mini'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), - n=12000 + (ADVI, "mini"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), n=12000 ), - (NFVI, 'full'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), - n=12000 + (NFVI, "full"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), n=12000 ), - (NFVI, 'mini'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), - n=12000 + (NFVI, "mini"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), n=12000 ), - (FullRankADVI, 'full'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50), - n=6000 + (FullRankADVI, "full"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50), n=6000 ), - (FullRankADVI, 'mini'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50), - n=12000 + (FullRankADVI, "mini"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50), n=12000 ), - (SVGD, 'full'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7), - n=300 + (SVGD, "full"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7), n=300 ), - (SVGD, 'mini'): dict( - obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7), - n=300 + (SVGD, "mini"): dict( + obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7), n=300 ), - (ASVGD, 'full'): dict( + (ASVGD, "full"): dict( obj_optimizer=pm.adagrad_window(learning_rate=0.07, n_win=10), - n=500, obj_n_mc=300 + n=500, + obj_n_mc=300, ), - (ASVGD, 'mini'): dict( + (ASVGD, "mini"): dict( obj_optimizer=pm.adagrad_window(learning_rate=0.07, n_win=10), - n=500, obj_n_mc=300 - ) + n=500, + obj_n_mc=300, + ), } if use_minibatch: - key = 'mini' + key = "mini" # backward compat for PR#3071 inference.approx.scale_cost_to_minibatch = False else: - key = 'full' + key = "full" return _select[(type(inference), key)] -@pytest.mark.run('first') -def test_fit_oo(inference, - fit_kwargs, - simple_model_data): +@pytest.mark.run("first") +def test_fit_oo(inference, fit_kwargs, simple_model_data): trace = inference.fit(**fit_kwargs).sample(10000) - mu_post = simple_model_data['mu_post'] - d = simple_model_data['d'] - np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05) - np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1) + mu_post = simple_model_data["mu_post"] + d = simple_model_data["d"] + np.testing.assert_allclose(np.mean(trace["mu"]), mu_post, rtol=0.05) + np.testing.assert_allclose(np.std(trace["mu"]), np.sqrt(1.0 / d), rtol=0.1) def test_profile(inference): @@ -692,20 +769,21 @@ def test_profile(inference): def test_remove_scan_op(): with pm.Model(): - pm.Normal('n', 0, 1) + pm.Normal("n", 0, 1) inference = ADVI() buff = six.StringIO() inference.run_profiling(n=10).summary(buff) - assert 'theano.scan_module.scan_op.Scan' not in buff.getvalue() + assert "theano.scan_module.scan_op.Scan" not in buff.getvalue() buff.close() def test_clear_cache(): import pickle + pymc3.memoize.clear_cache() assert all(len(c) == 0 for c in pymc3.memoize.CACHE_REGISTRY) with pm.Model(): - pm.Normal('n', 0, 1) + pm.Normal("n", 0, 1) inference = ADVI() inference.fit(n=10) assert any(len(c) != 0 for c in inference.approx._cache.values()) @@ -713,7 +791,7 @@ def test_clear_cache(): # should not be cleared at this call assert all(len(c) == 0 for c in inference.approx._cache.values()) new_a = pickle.loads(pickle.dumps(inference.approx)) - assert not hasattr(new_a, '_cache') + assert not hasattr(new_a, "_cache") inference_new = pm.KLqp(new_a) inference_new.fit(n=10) assert any(len(c) != 0 for c in inference_new.approx._cache.values()) @@ -721,46 +799,43 @@ def test_clear_cache(): assert all(len(c) == 0 for c in inference_new.approx._cache.values()) -@pytest.fixture('module') +@pytest.fixture("module") def another_simple_model(): _model = models.simple_model()[1] with _model: - pm.Potential('pot', tt.ones((10, 10))) + pm.Potential("pot", tt.ones((10, 10))) return _model -@pytest.fixture(params=[ - dict(name='advi', kw=dict(start={})), - dict(name='fullrank_advi', kw=dict(start={})), - dict(name='svgd', kw=dict(start={}))], - ids=lambda d: d['name'] +@pytest.fixture( + params=[ + dict(name="advi", kw=dict(start={})), + dict(name="fullrank_advi", kw=dict(start={})), + dict(name="svgd", kw=dict(start={})), + ], + ids=lambda d: d["name"], ) def fit_method_with_object(request, another_simple_model): - _select = dict( - advi=ADVI, - fullrank_advi=FullRankADVI, - svgd=SVGD - ) + _select = dict(advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD) with another_simple_model: - return _select[request.param['name']]( - **request.param['kw']) + return _select[request.param["name"]](**request.param["kw"]) @pytest.mark.parametrize( - ['method', 'kwargs', 'error'], + ["method", "kwargs", "error"], [ - ('undefined', dict(), KeyError), + ("undefined", dict(), KeyError), (1, dict(), TypeError), - ('advi', dict(total_grad_norm_constraint=10), None), - ('fullrank_advi', dict(), None), - ('svgd', dict(total_grad_norm_constraint=10), None), - ('svgd', dict(start={}), None), + ("advi", dict(total_grad_norm_constraint=10), None), + ("fullrank_advi", dict(), None), + ("svgd", dict(total_grad_norm_constraint=10), None), + ("svgd", dict(start={}), None), # start argument is not allowed for ASVGD - ('asvgd', dict(start={}, total_grad_norm_constraint=10), TypeError), - ('asvgd', dict(total_grad_norm_constraint=10), None), - ('nfvi', dict(start={}), None), - ('nfvi=scale-loc', dict(start={}), None), - ('nfvi=bad-formula', dict(start={}), KeyError), + ("asvgd", dict(start={}, total_grad_norm_constraint=10), TypeError), + ("asvgd", dict(total_grad_norm_constraint=10), None), + ("nfvi", dict(start={}), None), + ("nfvi=scale-loc", dict(start={}), None), + ("nfvi=bad-formula", dict(start={}), KeyError), ], ) def test_fit_fn_text(method, kwargs, error, another_simple_model): @@ -772,40 +847,38 @@ def test_fit_fn_text(method, kwargs, error, another_simple_model): fit(10, method=method, **kwargs) -@pytest.fixture('module') +@pytest.fixture("module") def aevb_model(): with pm.Model() as model: - pm.HalfNormal('x', shape=(2,), total_size=5) - pm.Normal('y', shape=(2,)) + pm.HalfNormal("x", shape=(2,), total_size=5) + pm.Normal("y", shape=(2,)) x = model.x y = model.y mu = theano.shared(x.init_value) rho = theano.shared(np.zeros_like(x.init_value)) - return { - 'model': model, - 'y': y, - 'x': x, - 'replace': dict(mu=mu, rho=rho) - } + return {"model": model, "y": y, "x": x, "replace": dict(mu=mu, rho=rho)} def test_aevb(inference_spec, aevb_model): # add to inference that supports aevb - x = aevb_model['x'] - y = aevb_model['y'] - model = aevb_model['model'] - replace = aevb_model['replace'] + x = aevb_model["x"] + y = aevb_model["y"] + model = aevb_model["model"] + replace = aevb_model["replace"] with model: try: - inference = inference_spec(local_rv={x: {'mu': replace['mu']*5, 'rho': replace['rho']}}) - approx = inference.fit(3, obj_n_mc=2, more_obj_params=list(replace.values())) + inference = inference_spec( + local_rv={x: {"mu": replace["mu"] * 5, "rho": replace["rho"]}} + ) + approx = inference.fit( + 3, obj_n_mc=2, more_obj_params=list(replace.values()) + ) approx.sample(10) approx.sample_node( - y, - more_replacements={x: np.asarray([1, 1], dtype=x.dtype)} + y, more_replacements={x: np.asarray([1, 1], dtype=x.dtype)} ).eval() except pm.opvi.AEVBInferenceError: - pytest.skip('Does not support AEVB') + pytest.skip("Does not support AEVB") def test_rowwise_approx(three_var_model, parametric_grouped_approxes): @@ -813,19 +886,20 @@ def test_rowwise_approx(three_var_model, parametric_grouped_approxes): cls, kw = parametric_grouped_approxes with three_var_model: try: - approx = Approximation([cls([three_var_model.one], rowwise=True, **kw), Group(None, vfam='mf')]) + approx = Approximation( + [cls([three_var_model.one], rowwise=True, **kw), Group(None, vfam="mf")] + ) inference = pm.KLqp(approx) approx = inference.fit(3, obj_n_mc=2) approx.sample(10) - approx.sample_node( - three_var_model.one - ).eval() + approx.sample_node(three_var_model.one).eval() except pm.opvi.BatchedGroupError: - pytest.skip('Does not support rowwise grouping') + pytest.skip("Does not support rowwise grouping") def test_pickle_approx(three_var_approx): import pickle + dump = pickle.dumps(three_var_approx) new = pickle.loads(dump) assert new.sample(1) @@ -833,6 +907,7 @@ def test_pickle_approx(three_var_approx): def test_pickle_single_group(three_var_approx_single_group_mf): import pickle + dump = pickle.dumps(three_var_approx_single_group_mf) new = pickle.loads(dump) assert new.sample(1) @@ -840,28 +915,29 @@ def test_pickle_single_group(three_var_approx_single_group_mf): def test_pickle_approx_aevb(three_var_aevb_approx): import pickle + dump = pickle.dumps(three_var_aevb_approx) new = pickle.loads(dump) assert new.sample(1000) -@pytest.fixture('module') +@pytest.fixture("module") def binomial_model(): n_samples = 100 xs = np.random.binomial(n=1, p=0.2, size=n_samples) with pm.Model() as model: - p = pm.Beta('p', alpha=1, beta=1) - pm.Binomial('xs', n=1, p=p, observed=xs) + p = pm.Beta("p", alpha=1, beta=1) + pm.Binomial("xs", n=1, p=p, observed=xs) return model -@pytest.fixture('module') +@pytest.fixture("module") def binomial_model_inference(binomial_model, inference_spec): with binomial_model: return inference_spec() -@pytest.mark.run(after='test_sample_replacements') +@pytest.mark.run(after="test_sample_replacements") def test_replacements(binomial_model_inference): d = tt.bscalar() d.tag.test_value = 1 @@ -869,32 +945,20 @@ def test_replacements(binomial_model_inference): p = approx.model.p p_t = p ** 3 p_s = approx.sample_node(p_t) - if theano.config.compute_test_value != 'off': + if theano.config.compute_test_value != "off": assert p_s.tag.test_value.shape == p_t.tag.test_value.shape sampled = [p_s.eval() for _ in range(100)] - assert any(map( - operator.ne, - sampled[1:], sampled[:-1]) - ) # stochastic + assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic p_d = approx.sample_node(p_t, deterministic=True) sampled = [p_d.eval() for _ in range(100)] - assert all(map( - operator.eq, - sampled[1:], sampled[:-1]) - ) # deterministic + assert all(map(operator.eq, sampled[1:], sampled[:-1])) # deterministic p_r = approx.sample_node(p_t, deterministic=d) sampled = [p_r.eval({d: 1}) for _ in range(100)] - assert all(map( - operator.eq, - sampled[1:], sampled[:-1]) - ) # deterministic + assert all(map(operator.eq, sampled[1:], sampled[:-1])) # deterministic sampled = [p_r.eval({d: 0}) for _ in range(100)] - assert any(map( - operator.ne, - sampled[1:], sampled[:-1]) - ) # stochastic + assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic def test_sample_replacements(binomial_model_inference): @@ -904,21 +968,15 @@ def test_sample_replacements(binomial_model_inference): p = approx.model.p p_t = p ** 3 p_s = approx.sample_node(p_t, size=100) - if theano.config.compute_test_value != 'off': - assert p_s.tag.test_value.shape == (100, ) + p_t.tag.test_value.shape + if theano.config.compute_test_value != "off": + assert p_s.tag.test_value.shape == (100,) + p_t.tag.test_value.shape sampled = p_s.eval() - assert any(map( - operator.ne, - sampled[1:], sampled[:-1]) - ) # stochastic + assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic assert sampled.shape[0] == 100 p_d = approx.sample_node(p_t, size=i) sampled = p_d.eval({i: 100}) - assert any(map( - operator.ne, - sampled[1:], sampled[:-1]) - ) # deterministic + assert any(map(operator.ne, sampled[1:], sampled[:-1])) # deterministic assert sampled.shape[0] == 100 sampled = p_d.eval({i: 101}) assert sampled.shape[0] == 101 @@ -930,25 +988,27 @@ def test_discrete_not_allowed(): y = np.random.normal(mu_true[z_true], np.ones_like(z_true)) with pm.Model(): - mu = pm.Normal('mu', mu=0, sd=10, shape=3) - z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y)) - pm.Normal('y_obs', mu=mu[z], sd=1., observed=y) + mu = pm.Normal("mu", mu=0, sd=10, shape=3) + z = pm.Categorical("z", p=tt.ones(3) / 3, shape=len(y)) + pm.Normal("y_obs", mu=mu[z], sd=1.0, observed=y) with pytest.raises(opvi.ParametrizationError): pm.fit(n=1) # fails def test_var_replacement(): X_mean = pm.floatX(np.linspace(0, 10, 10)) - y = pm.floatX(np.random.normal(X_mean*4, .05)) + y = pm.floatX(np.random.normal(X_mean * 4, 0.05)) with pm.Model(): - inp = pm.Normal('X', X_mean, shape=X_mean.shape) - coef = pm.Normal('b', 4.) + inp = pm.Normal("X", X_mean, shape=X_mean.shape) + coef = pm.Normal("b", 4.0) mean = inp * coef - pm.Normal('y', mean, .1, observed=y) + pm.Normal("y", mean, 0.1, observed=y) advi = pm.fit(100) - assert advi.sample_node(mean).eval().shape == (10, ) + assert advi.sample_node(mean).eval().shape == (10,) x_new = pm.floatX(np.linspace(0, 10, 11)) - assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11, ) + assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == ( + 11, + ) def test_empirical_from_trace(another_simple_model): @@ -964,30 +1024,31 @@ def test_empirical_from_trace(another_simple_model): @pytest.fixture( params=[ - dict(cls=flows.PlanarFlow, init=dict(jitter=.1)), - dict(cls=flows.RadialFlow, init=dict(jitter=.1)), - dict(cls=flows.ScaleFlow, init=dict(jitter=.1)), - dict(cls=flows.LocFlow, init=dict(jitter=.1)), - dict(cls=flows.HouseholderFlow, init=dict(jitter=.1)), + dict(cls=flows.PlanarFlow, init=dict(jitter=0.1)), + dict(cls=flows.RadialFlow, init=dict(jitter=0.1)), + dict(cls=flows.ScaleFlow, init=dict(jitter=0.1)), + dict(cls=flows.LocFlow, init=dict(jitter=0.1)), + dict(cls=flows.HouseholderFlow, init=dict(jitter=0.1)), ], - ids=lambda d: d['cls'].__name__ + ids=lambda d: d["cls"].__name__, ) def flow_spec(request): - cls = request.param['cls'] - init = request.param['init'] + cls = request.param["cls"] + init = request.param["init"] def init_(**kw): k = init.copy() k.update(kw) return cls(**k) + init_.cls = cls return init_ def test_flow_det(flow_spec): - z0 = tt.arange(0, 20).astype('float32') - flow = flow_spec(dim=20, z0=z0.dimshuffle('x', 0)) - with change_flags(compute_test_value='off'): + z0 = tt.arange(0, 20).astype("float32") + flow = flow_spec(dim=20, z0=z0.dimshuffle("x", 0)) + with change_flags(compute_test_value="off"): z1 = flow.forward.flatten() J = tt.jacobian(z1, z0) logJdet = tt.log(tt.abs_(tt.nlinalg.det(J))) @@ -996,14 +1057,14 @@ def test_flow_det(flow_spec): def test_flow_det_local(flow_spec): - z0 = tt.arange(0, 12).astype('float32') + z0 = tt.arange(0, 12).astype("float32") spec = flow_spec.cls.get_param_spec_for(d=12) params = dict() for k, shp in spec.items(): - params[k] = np.random.randn(1, *shp).astype('float32') + params[k] = np.random.randn(1, *shp).astype("float32") flow = flow_spec(dim=12, z0=z0.reshape((1, 1, 12)), **params) assert flow.batched - with change_flags(compute_test_value='off'): + with change_flags(compute_test_value="off"): z1 = flow.forward.flatten() J = tt.jacobian(z1, z0) logJdet = tt.log(tt.abs_(tt.nlinalg.det(J))) @@ -1017,18 +1078,20 @@ def test_flows_collect_chain(): flow2 = flows.PlanarFlow(dim=2, z0=flow1) assert len(flow2.params) == 3 assert len(flow2.all_params) == 6 - np.testing.assert_allclose(flow1.logdet.eval() + flow2.logdet.eval(), flow2.sum_logdets.eval()) + np.testing.assert_allclose( + flow1.logdet.eval() + flow2.logdet.eval(), flow2.sum_logdets.eval() + ) @pytest.mark.parametrize( - 'formula,length,order', + "formula,length,order", [ - ('planar', 1, [flows.PlanarFlow]), - ('planar*2', 2, [flows.PlanarFlow] * 2), - ('planar-planar', 2, [flows.PlanarFlow] * 2), - ('planar-planar*2', 3, [flows.PlanarFlow] * 3), - ('hh-planar*2', 3, [flows.HouseholderFlow]+[flows.PlanarFlow] * 2) - ] + ("planar", 1, [flows.PlanarFlow]), + ("planar*2", 2, [flows.PlanarFlow] * 2), + ("planar-planar", 2, [flows.PlanarFlow] * 2), + ("planar-planar*2", 3, [flows.PlanarFlow] * 3), + ("hh-planar*2", 3, [flows.HouseholderFlow] + [flows.PlanarFlow] * 2), + ], ) def test_flow_formula(formula, length, order): spec = flows.Formula(formula) diff --git a/pymc3/theanof.py b/pymc3/theanof.py index 2ea40632d0..66dbadd6ef 100644 --- a/pymc3/theanof.py +++ b/pymc3/theanof.py @@ -10,20 +10,22 @@ from .data import GeneratorAdapter from .vartypes import typefilter, continuous_types -__all__ = ['gradient', - 'hessian', - 'hessian_diag', - 'inputvars', - 'cont_inputs', - 'floatX', - 'smartfloatX', - 'jacobian', - 'CallableTensor', - 'join_nonshared_inputs', - 'make_shared_replacements', - 'generator', - 'set_tt_rng', - 'tt_rng'] +__all__ = [ + "gradient", + "hessian", + "hessian_diag", + "inputvars", + "cont_inputs", + "floatX", + "smartfloatX", + "jacobian", + "CallableTensor", + "join_nonshared_inputs", + "make_shared_replacements", + "generator", + "set_tt_rng", + "tt_rng", +] def inputvars(a): @@ -71,10 +73,11 @@ def smartfloatX(x): """ Convert non int types to floatX """ - if str(x.dtype).startswith('float'): + if str(x.dtype).startswith("float"): x = floatX(x) return x + """ Theano derivative functions """ @@ -82,10 +85,10 @@ def smartfloatX(x): def gradient1(f, v): """flat gradient of f wrt v""" - return tt.flatten(tt.grad(f, v, disconnected_inputs='warn')) + return tt.flatten(tt.grad(f, v, disconnected_inputs="warn")) -empty_gradient = tt.zeros(0, dtype='float32') +empty_gradient = tt.zeros(0, dtype="float32") def gradient(f, vars=None): @@ -101,7 +104,7 @@ def gradient(f, vars=None): def jacobian1(f, v): """jacobian of f wrt v""" f = tt.flatten(f) - idx = tt.arange(f.shape[0], dtype='int32') + idx = tt.arange(f.shape[0], dtype="int32") def grad_i(i): return gradient1(f[i], v) @@ -120,25 +123,25 @@ def jacobian(f, vars=None): def jacobian_diag(f, x): - idx = tt.arange(f.shape[0], dtype='int32') + idx = tt.arange(f.shape[0], dtype="int32") def grad_ii(i): return theano.grad(f[i], x)[i] - return theano.scan(grad_ii, sequences=[idx], - n_steps=f.shape[0], - name='jacobian_diag')[0] + return theano.scan( + grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag" + )[0] -@change_flags(compute_test_value='ignore') +@change_flags(compute_test_value="ignore") def hessian(f, vars=None): return -jacobian(gradient(f, vars), vars) -@change_flags(compute_test_value='ignore') +@change_flags(compute_test_value="ignore") def hessian_diag1(f, v): g = gradient1(f, v) - idx = tt.arange(g.shape[0], dtype='int32') + idx = tt.arange(g.shape[0], dtype="int32") def hess_ii(i): return gradient1(g[i], v)[i] @@ -146,7 +149,7 @@ def hess_ii(i): return theano.map(hess_ii, idx)[0] -@change_flags(compute_test_value='ignore') +@change_flags(compute_test_value="ignore") def hessian_diag(f, vars=None): if vars is None: vars = cont_inputs(f) @@ -165,7 +168,6 @@ def makeiter(a): class IdentityOp(scalar.UnaryScalarOp): - @staticmethod def st_impl(x): return x @@ -203,7 +205,10 @@ def make_shared_replacements(vars, model): Dict of variable -> new shared variable """ othervars = set(model.vars) - set(vars) - return {var: theano.shared(var.tag.test_value, var.name + '_shared') for var in othervars} + return { + var: theano.shared(var.tag.test_value, var.name + "_shared") + for var in othervars + } def join_nonshared_inputs(xs, vars, shared, make_shared=False): @@ -222,15 +227,15 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False): inarray : vector of inputs """ if not vars: - raise ValueError('Empty list of variables.') + raise ValueError("Empty list of variables.") joined = tt.concatenate([var.ravel() for var in vars]) if not make_shared: tensor_type = joined.type - inarray = tensor_type('inarray') + inarray = tensor_type("inarray") else: - inarray = theano.shared(joined.tag.test_value, 'inarray') + inarray = theano.shared(joined.tag.test_value, "inarray") ordering = ArrayOrdering(vars) inarray.tag.test_value = joined.tag.test_value @@ -238,7 +243,8 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False): get_var = {var.name: var for var in vars} replace = { get_var[var]: reshape_t(inarray[slc], shp).astype(dtyp) - for var, slc, shp, dtyp in ordering.vmap} + for var, slc, shp, dtyp in ordering.vmap + } replace.update(shared) @@ -273,8 +279,8 @@ def __call__(self, input): return theano.clone(self.tensor, {oldinput: input}, strict=False) -scalar_identity = IdentityOp(scalar.upgrade_to_float, name='scalar_identity') -identity = tt.Elemwise(scalar_identity, name='identity') +scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity") +identity = tt.Elemwise(scalar_identity, name="identity") class GeneratorOp(Op): @@ -295,7 +301,8 @@ class GeneratorOp(Op): and yields np.arrays with same types default : np.array with the same type as generator produces """ - __props__ = ('generator',) + + __props__ = ("generator",) def __init__(self, gen, default=None): super(GeneratorOp, self).__init__() @@ -317,13 +324,13 @@ def perform(self, node, inputs, output_storage, params=None): def do_constant_folding(self, node): return False - __call__ = change_flags(compute_test_value='off')(Op.__call__) + __call__ = change_flags(compute_test_value="off")(Op.__call__) def set_gen(self, gen): if not isinstance(gen, GeneratorAdapter): gen = GeneratorAdapter(gen) if not gen.tensortype == self.generator.tensortype: - raise ValueError('New generator should yield the same type') + raise ValueError("New generator should yield the same type") self.generator = gen def set_default(self, value): @@ -334,8 +341,9 @@ def set_default(self, value): t1 = (False,) * value.ndim t2 = self.generator.tensortype.broadcastable if not t1 == t2: - raise ValueError('Default value should have the ' - 'same type as generator') + raise ValueError( + "Default value should have the " "same type as generator" + ) self.default = value @@ -450,13 +458,14 @@ def ix_(*args): new = tt.as_tensor(new) if new.ndim != 1: raise ValueError("Cross index must be 1 dimensional") - new = new.reshape((1,)*k + (new.size,) + (1,)*(nd-k-1)) + new = new.reshape((1,) * k + (new.size,) + (1,) * (nd - k - 1)) out.append(new) return tuple(out) def largest_common_dtype(tensors): - dtypes = set(str(t.dtype) if hasattr(t, 'dtype') - else smartfloatX(np.asarray(t)).dtype - for t in tensors) + dtypes = set( + str(t.dtype) if hasattr(t, "dtype") else smartfloatX(np.asarray(t)).dtype + for t in tensors + ) return np.stack([np.ones((), dtype=dtype) for dtype in dtypes]).dtype diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py index ffde751417..bf9d2dede2 100644 --- a/pymc3/tuning/scaling.py +++ b/pymc3/tuning/scaling.py @@ -1,16 +1,16 @@ -''' +""" Created on Mar 12, 2011 from __future__ import division @author: johnsalvatier -''' +""" import numpy as np from numpy import exp, log, sqrt from ..model import modelcontext, Point from ..theanof import hessian_diag, inputvars from ..blocking import DictToArrayBijection, ArrayOrdering -__all__ = ['approx_hessian', 'find_hessian', 'trace_cov', 'guess_scaling'] +__all__ = ["approx_hessian", "find_hessian", "trace_cov", "guess_scaling"] def approx_hessian(point, vars=None, model=None): @@ -39,11 +39,11 @@ def approx_hessian(point, vars=None, model=None): def grad_logp(point): return np.nan_to_num(dlogp(point)) - ''' + """ Find the jacobian of the gradient function at the current position this should be the Hessian; invert it to find the approximate covariance matrix. - ''' + """ return -Jacobian(grad_logp)(bij.map(point)) @@ -124,8 +124,8 @@ def adjust_scaling(s, scaling_bound): def adjust_precision(tau, scaling_bound=1e-8): mag = sqrt(abs(tau)) - bounded = bound(log(mag), log(scaling_bound), log(1./scaling_bound)) - return exp(bounded)**2 + bounded = bound(log(mag), log(scaling_bound), log(1.0 / scaling_bound)) + return exp(bounded) ** 2 def bound(a, l, u): diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index 491c38b850..754bd45892 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -1,8 +1,8 @@ -''' +""" Created on Mar 12, 2011 @author: johnsalvatier -''' +""" from scipy.optimize import minimize import numpy as np from numpy import isfinite, nan_to_num @@ -18,12 +18,21 @@ import warnings from inspect import getargspec -__all__ = ['find_MAP'] - - -def find_MAP(start=None, vars=None, method="L-BFGS-B", - return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, - *args, **kwargs): +__all__ = ["find_MAP"] + + +def find_MAP( + start=None, + vars=None, + method="L-BFGS-B", + return_raw=False, + include_transformed=True, + progressbar=True, + maxeval=5000, + model=None, + *args, + **kwargs +): """ Finds the local maximum a posteriori point given a model. @@ -58,7 +67,9 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B", wrapped it inside pymc3.sample() and you should thus avoid this method. """ - warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.') + warnings.warn( + "find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way." + ) model = modelcontext(model) if start is None: @@ -67,10 +78,12 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B", update_start_vals(start, model.test_point, model) if not set(start.keys()).issubset(model.named_vars.keys()): - extra_keys = ', '.join(set(start.keys()) - set(model.named_vars.keys())) - valid_keys = ', '.join(model.named_vars.keys()) - raise KeyError('Some start parameters do not appear in the model!\n' - 'Valid keys are: {}, but {} was supplied'.format(valid_keys, extra_keys)) + extra_keys = ", ".join(set(start.keys()) - set(model.named_vars.keys())) + valid_keys = ", ".join(model.named_vars.keys()) + raise KeyError( + "Some start parameters do not appear in the model!\n" + "Valid keys are: {}, but {} was supplied".format(valid_keys, extra_keys) + ) if vars is None: vars = model.cont_vars @@ -90,29 +103,37 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B", compute_gradient = False if disc_vars or not compute_gradient: - pm._log.warning("Warning: gradient not available." + - "(E.g. vars contains discrete variables). MAP " + - "estimates may not be accurate for the default " + - "parameters. Defaulting to non-gradient minimization " + - "'Powell'.") + pm._log.warning( + "Warning: gradient not available." + + "(E.g. vars contains discrete variables). MAP " + + "estimates may not be accurate for the default " + + "parameters. Defaulting to non-gradient minimization " + + "'Powell'." + ) method = "Powell" if "fmin" in kwargs: fmin = kwargs.pop("fmin") - warnings.warn('In future versions, set the optimization algorithm with a string. ' - 'For example, use `method="L-BFGS-B"` instead of ' - '`fmin=sp.optimize.fmin_l_bfgs_b"`.') + warnings.warn( + "In future versions, set the optimization algorithm with a string. " + 'For example, use `method="L-BFGS-B"` instead of ' + '`fmin=sp.optimize.fmin_l_bfgs_b"`.' + ) cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) # Check to see if minimization function actually uses the gradient - if 'fprime' in getargspec(fmin).args: + if "fprime" in getargspec(fmin).args: + def grad_logp(point): return nan_to_num(-dlogp_func(point)) - opt_result = fmin(cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs) + + opt_result = fmin( + cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs + ) else: # Check to see if minimization function uses a starting value - if 'x0' in getargspec(fmin).args: + if "x0" in getargspec(fmin).args: opt_result = fmin(cost_func, bij.map(start), *args, **kwargs) else: opt_result = fmin(cost_func, *args, **kwargs) @@ -129,7 +150,9 @@ def grad_logp(point): cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: - opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) + opt_result = minimize( + cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs + ) mx0 = opt_result["x"] # r -> opt_result cost_func.progress.total = cost_func.progress.n + 1 cost_func.progress.update() @@ -142,7 +165,9 @@ def grad_logp(point): cost_func.progress.close() vars = get_default_varnames(model.unobserved_RVs, include_transformed) - mx = {var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))} + mx = { + var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0))) + } if return_raw: return mx, opt_result @@ -171,11 +196,11 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No self.logp_func = logp_func if dlogp_func is None: self.use_gradient = False - self.desc = 'logp = {:,.5g}' + self.desc = "logp = {:,.5g}" else: self.dlogp_func = dlogp_func self.use_gradient = True - self.desc = 'logp = {:,.5g}, ||grad|| = {:,.5g}' + self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}" self.previous_x = None self.progress = tqdm(total=maxeval, disable=not progressbar) self.progress.n = 0 @@ -187,7 +212,7 @@ def __call__(self, x): neg_grad = self.dlogp_func(pm.floatX(x)) if np.all(np.isfinite(neg_grad)): self.previous_x = x - grad = nan_to_num(-1.0*neg_grad) + grad = nan_to_num(-1.0 * neg_grad) grad = grad.astype(np.float64) else: self.previous_x = x diff --git a/pymc3/util.py b/pymc3/util.py index 080a2121ed..934e4af82f 100644 --- a/pymc3/util.py +++ b/pymc3/util.py @@ -2,7 +2,7 @@ import functools from numpy import asscalar -LATEX_ESCAPE_RE = re.compile(r'(%|_|\$|#|&)', re.MULTILINE) +LATEX_ESCAPE_RE = re.compile(r"(%|_|\$|#|&)", re.MULTILINE) def escape_latex(strng): @@ -25,8 +25,8 @@ def escape_latex(strng): A string with LaTeX escaped """ if strng is None: - return u'None' - return LATEX_ESCAPE_RE.sub(r'\\\1', strng) + return u"None" + return LATEX_ESCAPE_RE.sub(r"\\\1", strng) def get_transformed_name(name, transform): @@ -62,7 +62,7 @@ def is_transformed_name(name): bool Boolean, whether the string could have been produced by `get_transormed_name` """ - return name.endswith('__') and name.count('_') >= 3 + return name.endswith("__") and name.count("_") >= 3 def get_untransformed_name(name): @@ -80,9 +80,8 @@ def get_untransformed_name(name): String with untransformed version of the name. """ if not is_transformed_name(name): - raise ValueError( - u'{} does not appear to be a transformed name'.format(name)) - return '_'.join(name.split('_')[:-3]) + raise ValueError(u"{} does not appear to be a transformed name".format(name)) + return "_".join(name.split("_")[:-3]) def get_default_varnames(var_iterator, include_transformed): @@ -112,19 +111,20 @@ def get_variable_name(variable): """ name = variable.name if name is None: - if hasattr(variable, 'get_parents'): + if hasattr(variable, "get_parents"): try: - names = [get_variable_name(item) - for item in variable.get_parents()[0].inputs] + names = [ + get_variable_name(item) for item in variable.get_parents()[0].inputs + ] # do not escape_latex these, since it is not idempotent - return 'f(%s)' % ',~'.join([n for n in names if isinstance(n, str)]) + return "f(%s)" % ",~".join([n for n in names if isinstance(n, str)]) except IndexError: pass value = variable.eval() if not value.shape: return asscalar(value) - return 'array' - return r'\text{%s}' % name + return "array" + return r"\text{%s}" % name def update_start_vals(a, b, model): @@ -137,16 +137,16 @@ def update_start_vals(a, b, model): for name in a: if is_transformed_name(tname) and get_untransformed_name(tname) == name: transform_func = [ - d.transformation for d in model.deterministics if d.name == name] + d.transformation for d in model.deterministics if d.name == name + ] if transform_func: - b[tname] = transform_func[0].forward_val( - a[name], point=b) + b[tname] = transform_func[0].forward_val(a[name], point=b) a.update({k: v for k, v in b.items() if k not in a}) def get_transformed(z): - if hasattr(z, 'transformed'): + if hasattr(z, "transformed"): z = z.transformed return z @@ -165,4 +165,5 @@ def enhanced(*args, **kwargs): else: newwrapper = functools.partial(wrapper, *args, **kwargs) return newwrapper + return enhanced diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index 72c04cccc5..9ab68fe9eb 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -13,7 +13,7 @@ adam, adamax, norm_constraint, - total_norm_constraint + total_norm_constraint, ) from . import inference @@ -26,7 +26,7 @@ Inference, KLqp, ImplicitGradient, - fit + fit, ) from . import approximations @@ -35,13 +35,10 @@ FullRank, Empirical, NormalizingFlow, - sample_approx + sample_approx, ) from . import opvi -from .opvi import ( - Group, - Approximation -) +from .opvi import Group, Approximation # special from .stein import Stein diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 30779211ed..1c4cccb058 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -12,37 +12,31 @@ from pymc3.variational import flows -__all__ = [ - 'MeanField', - 'FullRank', - 'Empirical', - 'NormalizingFlow', - 'sample_approx' -] +__all__ = ["MeanField", "FullRank", "Empirical", "NormalizingFlow", "sample_approx"] @Group.register class MeanFieldGroup(Group): - R"""Mean Field approximation to the posterior where spherical Gaussian family + r"""Mean Field approximation to the posterior where spherical Gaussian family is fitted to minimize KL divergence from True posterior. It is assumed that latent space variables are uncorrelated that is the main drawback of the method """ - __param_spec__ = dict(mu=('d', ), rho=('d', )) - short_name = 'mean_field' - alias_names = frozenset(['mf']) + __param_spec__ = dict(mu=("d",), rho=("d",)) + short_name = "mean_field" + alias_names = frozenset(["mf"]) @node_property def mean(self): - return self.params_dict['mu'] + return self.params_dict["mu"] @node_property def rho(self): - return self.params_dict['rho'] + return self.params_dict["rho"] @node_property def cov(self): - var = rho2sd(self.rho)**2 + var = rho2sd(self.rho) ** 2 if self.batched: return batched_diag(var) else: @@ -52,12 +46,12 @@ def cov(self): def std(self): return rho2sd(self.rho) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init_group__(self, group): super(MeanFieldGroup, self).__init_group__(group) if not self._check_user_params(): self.shared_params = self.create_shared_params( - self._kwargs.get('start', None) + self._kwargs.get("start", None) ) self._finalize_init() @@ -76,10 +70,10 @@ def create_shared_params(self, start=None): if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) - return {'mu': theano.shared( - pm.floatX(start), 'mu'), - 'rho': theano.shared( - pm.floatX(rho), 'rho')} + return { + "mu": theano.shared(pm.floatX(start), "mu"), + "rho": theano.shared(pm.floatX(rho), "rho"), + } @node_property def symbolic_random(self): @@ -104,16 +98,17 @@ class FullRankGroup(Group): MeanField approach correlations between variables are taken in account. The main drawback of the method is computational cost. """ - __param_spec__ = dict(mu=('d',), L_tril=('int(d * (d + 1) / 2)',)) - short_name = 'full_rank' - alias_names = frozenset(['fr']) - @change_flags(compute_test_value='off') + __param_spec__ = dict(mu=("d",), L_tril=("int(d * (d + 1) / 2)",)) + short_name = "full_rank" + alias_names = frozenset(["fr"]) + + @change_flags(compute_test_value="off") def __init_group__(self, group): super(FullRankGroup, self).__init_group__(group) if not self._check_user_params(): self.shared_params = self.create_shared_params( - self._kwargs.get('start', None) + self._kwargs.get("start", None) ) self._finalize_init() @@ -129,35 +124,29 @@ def create_shared_params(self, start=None): else: start = self.bij.map(start) n = self.ddim - L_tril = ( - np.eye(n) - [np.tril_indices(n)] - .astype(theano.config.floatX) - ) + L_tril = np.eye(n)[np.tril_indices(n)].astype(theano.config.floatX) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) - return {'mu': theano.shared(start, 'mu'), - 'L_tril': theano.shared(L_tril, 'L_tril')} + return { + "mu": theano.shared(start, "mu"), + "L_tril": theano.shared(L_tril, "L_tril"), + } @node_property def L(self): if self.batched: L = tt.zeros((self.ddim, self.ddim, self.bdim)) - L = tt.set_subtensor( - L[self.tril_indices], - self.params_dict['L_tril'].T) + L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T) L = L.dimshuffle(2, 0, 1) else: L = tt.zeros((self.ddim, self.ddim)) - L = tt.set_subtensor( - L[self.tril_indices], - self.params_dict['L_tril']) + L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"]) return L @node_property def mean(self): - return self.params_dict['mu'] + return self.params_dict["mu"] @node_property def cov(self): @@ -187,8 +176,10 @@ def tril_indices(self): def symbolic_logq_not_scaled(self): z = self.symbolic_random if self.batched: + def logq(z_b, mu_b, L_b): return pm.MvNormal.dist(mu=mu_b, chol=L_b).logp(z_b) + # it's gonna be so slow # scan is computed over batch and then summed up # output shape is (batch, samples) @@ -215,28 +206,29 @@ class EmpiricalGroup(Group): """Builds Approximation instance from a given trace, it has the same interface as variational approximation """ + supports_batched = False has_logq = False - __param_spec__ = dict(histogram=('s', 'd')) - short_name = 'empirical' + __param_spec__ = dict(histogram=("s", "d")) + short_name = "empirical" - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init_group__(self, group): super(EmpiricalGroup, self).__init_group__(group) self._check_trace() if not self._check_user_params(spec_kw=dict(s=-1)): self.shared_params = self.create_shared_params( - trace=self._kwargs.get('trace', None), - size=self._kwargs.get('size', None), - jitter=self._kwargs.get('jitter', 1), - start=self._kwargs.get('start', None) + trace=self._kwargs.get("trace", None), + size=self._kwargs.get("size", None), + jitter=self._kwargs.get("jitter", 1), + start=self._kwargs.get("start", None), ) self._finalize_init() def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: - raise opvi.ParametrizationError('Need `trace` or `size` to initialize') + raise opvi.ParametrizationError("Need `trace` or `size` to initialize") else: if start is None: start = self.model.test_point @@ -256,14 +248,14 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None): for j in range(len(trace)): histogram[i] = self.bij.map(trace.point(j, t)) i += 1 - return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram')) + return dict(histogram=theano.shared(pm.floatX(histogram), "histogram")) def _check_trace(self): - trace = self._kwargs.get('trace', None) - if (trace is not None - and not all([var.name in trace.varnames - for var in self.group])): - raise ValueError('trace has not all FreeRV in the group') + trace = self._kwargs.get("trace", None) + if trace is not None and not all( + [var.name in trace.varnames for var in self.group] + ): + raise ValueError("trace has not all FreeRV in the group") def randidx(self, size=None): if size is None: @@ -272,16 +264,16 @@ def randidx(self, size=None): if size.ndim < 1: size = size[None] elif size.ndim > 1: - raise ValueError('size ndim should be no more than 1d') + raise ValueError("size ndim should be no more than 1d") else: pass else: size = tuple(np.atleast_1d(size)) - return (self._rng - .uniform(size=size, - low=pm.floatX(0), - high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16)) - .astype('int32')) + return self._rng.uniform( + size=size, + low=pm.floatX(0), + high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16), + ).astype("int32") def _new_initial(self, size, deterministic, more_replacements=None): theano_condition_is_here = isinstance(deterministic, tt.Variable) @@ -289,14 +281,15 @@ def _new_initial(self, size, deterministic, more_replacements=None): return tt.switch( deterministic, tt.repeat( - self.mean.dimshuffle('x', 0), - size if size is not None else 1, -1), - self.histogram[self.randidx(size)]) + self.mean.dimshuffle("x", 0), size if size is not None else 1, -1 + ), + self.histogram[self.randidx(size)], + ) else: if deterministic: return tt.repeat( - self.mean.dimshuffle('x', 0), - size if size is not None else 1, -1) + self.mean.dimshuffle("x", 0), size if size is not None else 1, -1 + ) else: return self.histogram[self.randidx(size)] @@ -306,7 +299,7 @@ def symbolic_random(self): @property def histogram(self): - return self.params_dict['histogram'] + return self.params_dict["histogram"] @node_property def mean(self): @@ -314,7 +307,7 @@ def mean(self): @node_property def cov(self): - x = (self.histogram - self.mean) + x = self.histogram - self.mean return x.T.dot(x) / pm.floatX(self.histogram.shape[0]) @node_property @@ -323,14 +316,14 @@ def std(self): def __str__(self): if isinstance(self.histogram, theano.compile.SharedVariable): - shp = ', '.join(map(str, self.histogram.shape.eval())) + shp = ", ".join(map(str, self.histogram.shape.eval())) else: - shp = 'None, ' + str(self.ddim) - return '{cls}[{shp}]'.format(shp=shp, cls=self.__class__.__name__) + shp = "None, " + str(self.ddim) + return "{cls}[{shp}]".format(shp=shp, cls=self.__class__.__name__) class NormalizingFlowGroup(Group): - R"""Normalizing flow is a series of invertible transformations on initial distribution. + r"""Normalizing flow is a series of invertible transformations on initial distribution. .. math:: @@ -377,17 +370,17 @@ class NormalizingFlowGroup(Group): Improving Variational Auto-Encoders using Householder Flow arXiv:1611.09630 """ - default_flow = 'scale-loc' + default_flow = "scale-loc" - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init_group__(self, group): super(NormalizingFlowGroup, self).__init_group__(group) # objects to be resolved # 1. string formula # 2. not changed default value # 3. Formula - formula = self._kwargs.get('flow', self._vfam) - jitter = self._kwargs.get('jitter', 1) + formula = self._kwargs.get("flow", self._vfam) + jitter = self._kwargs.get("jitter", 1) if formula is None or isinstance(formula, str): # case 1 and 2 has_params = self._check_user_params(f=formula) @@ -395,13 +388,15 @@ def __init_group__(self, group): # case 3 has_params = self._check_user_params(f=formula.formula) else: - raise TypeError('Wrong type provided for NormalizingFlow as `flow` argument, ' - 'expected Formula or string') + raise TypeError( + "Wrong type provided for NormalizingFlow as `flow` argument, " + "expected Formula or string" + ) if not has_params: if formula is None: formula = self.default_flow else: - formula = '-'.join( + formula = "-".join( flows.flow_for_params(self.user_params[i]).short_name for i in range(len(self.user_params)) ) @@ -424,31 +419,38 @@ def __init_group__(self, group): def _check_user_params(self, **kwargs): params = self._user_params = self.user_params - formula = kwargs.pop('f') + formula = kwargs.pop("f") if params is None: return False if formula is not None: - raise opvi.ParametrizationError('No formula is allowed if user params are provided') + raise opvi.ParametrizationError( + "No formula is allowed if user params are provided" + ) if not isinstance(params, dict): - raise TypeError('params should be a dict') + raise TypeError("params should be a dict") if not all(isinstance(k, int) for k in params.keys()): - raise TypeError('params should be a dict with `int` keys') + raise TypeError("params should be a dict with `int` keys") needed = set(range(len(params))) givens = set(params.keys()) if givens != needed: raise opvi.ParametrizationError( - 'Passed parameters do not have a needed set of keys, ' - 'they should be equal, needed {needed}, got {givens}'.format( - givens=list(sorted(givens)), needed='[0, 1, ..., %d]' % len(formula.flows))) + "Passed parameters do not have a needed set of keys, " + "they should be equal, needed {needed}, got {givens}".format( + givens=list(sorted(givens)), + needed="[0, 1, ..., %d]" % len(formula.flows), + ) + ) for i in needed: flow = flows.flow_for_params(params[i]) flow_keys = set(flow.__param_spec__) user_keys = set(params[i].keys()) if flow_keys != user_keys: raise opvi.ParametrizationError( - 'Passed parameters for flow `{i}` ({cls}) do not have a needed set of keys, ' - 'they should be equal, needed {needed}, got {givens}'.format( - givens=user_keys, needed=flow_keys, i=i, cls=flow.__name__)) + "Passed parameters for flow `{i}` ({cls}) do not have a needed set of keys, " + "they should be equal, needed {needed}, got {givens}".format( + givens=user_keys, needed=flow_keys, i=i, cls=flow.__name__ + ) + ) return True @property @@ -468,7 +470,7 @@ def shared_params(self): @shared_params.setter def shared_params(self, value): if self.user_params is not None: - raise AttributeError('Cannot set when having user params') + raise AttributeError("Cannot set when having user params") current = self.flow i = 0 current.shared_params = value[i] @@ -485,7 +487,7 @@ def params(self): def symbolic_logq_not_scaled(self): z0 = self.symbolic_initial q0 = pm.Normal.dist().logp(z0).sum(range(1, z0.ndim)) - return q0-self.flow.sum_logdets + return q0 - self.flow.sum_logdets @property def symbolic_random(self): @@ -526,15 +528,22 @@ def sample_approx(approx, draws=100, include_transformed=True): # single group shortcuts exported to user class SingleGroupApproximation(Approximation): """Base class for Single Group Approximation""" + _group_class = None def __init__(self, *args, **kwargs): - local_rv = kwargs.get('local_rv') + local_rv = kwargs.get("local_rv") groups = [self._group_class(None, *args, **kwargs)] if local_rv is not None: - groups.extend([Group([v], params=p, local=True, model=kwargs.get('model')) - for v, p in local_rv.items()]) - super(SingleGroupApproximation, self).__init__(groups, model=kwargs.get('model')) + groups.extend( + [ + Group([v], params=p, local=True, model=kwargs.get("model")) + for v, p in local_rv.items() + ] + ) + super(SingleGroupApproximation, self).__init__( + groups, model=kwargs.get("model") + ) def __getattr__(self, item): return getattr(self.groups[0], item) @@ -548,35 +557,45 @@ def __dir__(self): class MeanField(SingleGroupApproximation): __doc__ = """**Single Group Mean Field Approximation** - """ + str(MeanFieldGroup.__doc__) + """ + str( + MeanFieldGroup.__doc__ + ) _group_class = MeanFieldGroup class FullRank(SingleGroupApproximation): __doc__ = """**Single Group Full Rank Approximation** - """ + str(FullRankGroup.__doc__) + """ + str( + FullRankGroup.__doc__ + ) _group_class = FullRankGroup class Empirical(SingleGroupApproximation): __doc__ = """**Single Group Full Rank Approximation** - """ + str(EmpiricalGroup.__doc__) + """ + str( + EmpiricalGroup.__doc__ + ) _group_class = EmpiricalGroup def __init__(self, trace=None, size=None, **kwargs): - if kwargs.get('local_rv', None) is not None: - raise opvi.LocalGroupError('Empirical approximation does not support local variables') + if kwargs.get("local_rv", None) is not None: + raise opvi.LocalGroupError( + "Empirical approximation does not support local variables" + ) super(Empirical, self).__init__(trace=trace, size=size, **kwargs) class NormalizingFlow(SingleGroupApproximation): __doc__ = """**Single Group Normalizing Flow Approximation** - """ + str(NormalizingFlowGroup.__doc__) + """ + str( + NormalizingFlowGroup.__doc__ + ) _group_class = NormalizingFlowGroup def __init__(self, flow=NormalizingFlowGroup.default_flow, *args, **kwargs): - kwargs['flow'] = flow + kwargs["flow"] = flow super(NormalizingFlow, self).__init__(*args, **kwargs) diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index 748062e777..057564eeaf 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -2,11 +2,7 @@ import numpy as np -__all__ = [ - 'Callback', - 'CheckParametersConvergence', - 'Tracker' -] +__all__ = ["Callback", "CheckParametersConvergence", "Tracker"] class Callback(object): @@ -22,10 +18,7 @@ def absolute(current, prev): return np.abs(current - prev) -_diff = dict( - relative=relative, - absolute=absolute -) +_diff = dict(relative=relative, absolute=absolute) class CheckParametersConvergence(Callback): @@ -54,8 +47,7 @@ class CheckParametersConvergence(Callback): ... ) """ - def __init__(self, every=100, tolerance=1e-3, - diff='relative', ord=np.inf): + def __init__(self, every=100, tolerance=1e-3, diff="relative", ord=np.inf): self._diff = _diff[diff] self.ord = ord self.every = every @@ -74,7 +66,7 @@ def __call__(self, approx, _, i): self.prev = current norm = np.linalg.norm(delta, self.ord) if norm < self.tolerance: - raise StopIteration('Convergence achieved at %d' % i) + raise StopIteration("Convergence achieved at %d" % i) @staticmethod def flatten_shared(shared_list): @@ -115,6 +107,7 @@ class Tracker(Callback): ... tracker = Tracker(some_stat=my_callable, time=time.time) ... approx = pm.fit(callbacks=[tracker]) """ + def __init__(self, **kwargs): self.whatchdict = kwargs self.hist = collections.defaultdict(list) diff --git a/pymc3/variational/flows.py b/pymc3/variational/flows.py index 13a88a6df0..2cb5050502 100644 --- a/pymc3/variational/flows.py +++ b/pymc3/variational/flows.py @@ -9,12 +9,12 @@ from . import opvi __all__ = [ - 'Formula', - 'PlanarFlow', - 'HouseholderFlow', - 'RadialFlow', - 'LocFlow', - 'ScaleFlow' + "Formula", + "PlanarFlow", + "HouseholderFlow", + "RadialFlow", + "LocFlow", + "ScaleFlow", ] @@ -39,9 +39,9 @@ class Formula(object): """ def __init__(self, formula): - identifiers = formula.lower().replace(' ', '').split('-') - self.formula = '-'.join(identifiers) - identifiers = [idf.split('*') for idf in identifiers] + identifiers = formula.lower().replace(" ", "").split("-") + self.formula = "-".join(identifiers) + identifiers = [idf.split("*") for idf in identifiers] self.flows = [] for tup in identifiers: @@ -50,25 +50,31 @@ def __init__(self, formula): elif len(tup) == 2: self.flows.extend([flow_for_short_name(tup[0])] * int(tup[1])) else: - raise ValueError('Wrong format: %s' % formula) + raise ValueError("Wrong format: %s" % formula) if len(self.flows) == 0: - raise ValueError('No flows in formula') + raise ValueError("No flows in formula") - def __call__(self, z0=None, dim=None, jitter=.001, params=None, batch_size=None): + def __call__(self, z0=None, dim=None, jitter=0.001, params=None, batch_size=None): if len(self.flows) == 0: - raise ValueError('No flows in formula') + raise ValueError("No flows in formula") if params is None: params = dict() flow = z0 for i, flow_cls in enumerate(self.flows): - flow = flow_cls(dim=dim, jitter=jitter, z0=flow, batch_size=batch_size, **params.get(i, {})) + flow = flow_cls( + dim=dim, + jitter=jitter, + z0=flow, + batch_size=batch_size, + **params.get(i, {}) + ) return flow def __reduce__(self): return self.__class__, self.formula def __latex__(self): - return r'Formula{\mathcal{N}(0, 1) -> %s}' % self.formula + return r"Formula{\mathcal{N}(0, 1) -> %s}" % self.formula __repr__ = _latex_repr_ = __latex__ @@ -103,35 +109,43 @@ def seems_like_flow_params(params): class AbstractFlow(WithMemoization): shared_params = None __param_spec__ = dict() - short_name = '' + short_name = "" __param_registry = dict() __name_registry = dict() @classmethod def register(cls, sbcls): - assert frozenset(sbcls.__param_spec__) not in cls.__param_registry, 'Duplicate __param_spec__' + assert ( + frozenset(sbcls.__param_spec__) not in cls.__param_registry + ), "Duplicate __param_spec__" cls.__param_registry[frozenset(sbcls.__param_spec__)] = sbcls - assert sbcls.short_name not in cls.__name_registry, 'Duplicate short_name' + assert sbcls.short_name not in cls.__name_registry, "Duplicate short_name" cls.__name_registry[sbcls.short_name] = sbcls return sbcls @classmethod def flow_for_params(cls, params): if frozenset(params) not in cls.__param_registry: - raise KeyError('No such flow for the following params: {!r}, ' - 'only the following are supported\n\n{}' - .format(params, cls.__param_registry)) + raise KeyError( + "No such flow for the following params: {!r}, " + "only the following are supported\n\n{}".format( + params, cls.__param_registry + ) + ) return cls.__param_registry[frozenset(params)] @classmethod def flow_for_short_name(cls, name): if name.lower() not in cls.__name_registry: - raise KeyError('No such flow: {!r}, ' - 'only the following are supported\n\n{}' - .format(name, cls.__name_registry)) + raise KeyError( + "No such flow: {!r}, " + "only the following are supported\n\n{}".format( + name, cls.__name_registry + ) + ) return cls.__name_registry[name.lower()] - def __init__(self, z0=None, dim=None, jitter=.001, batch_size=None, local=False): + def __init__(self, z0=None, dim=None, jitter=0.001, batch_size=None, local=False): self.local = local self.batch_size = batch_size self.__jitter = jitter @@ -144,29 +158,35 @@ def __init__(self, z0=None, dim=None, jitter=.001, batch_size=None, local=False) if dim is not None: self.dim = dim else: - raise ValueError('Cannot infer dimension of flow, ' - 'please provide dim or Flow instance as z0') + raise ValueError( + "Cannot infer dimension of flow, " + "please provide dim or Flow instance as z0" + ) if z0 is None: self.z0 = tt.matrix() # type: tt.TensorVariable else: self.z0 = tt.as_tensor(z0) self.parent = parent - def add_param(self, user=None, name=None, ref=0., dtype='floatX'): - if dtype == 'floatX': + def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"): + if dtype == "floatX": dtype = theano.config.floatX spec = self.__param_spec__[name] - shape = tuple(eval(s, {'d': self.dim}) for s in spec) + shape = tuple(eval(s, {"d": self.dim}) for s in spec) if user is None: if self.local: - raise opvi.LocalGroupError('Need parameters for local group flow') + raise opvi.LocalGroupError("Need parameters for local group flow") if self.batched: if self.batch_size is None: - raise opvi.BatchedGroupError('Need batch size to infer parameter shape') + raise opvi.BatchedGroupError( + "Need batch size to infer parameter shape" + ) shape = (self.batch_size,) + shape return theano.shared( - np.asarray(np.random.normal(size=shape) * self.__jitter + ref).astype(dtype), - name=name + np.asarray(np.random.normal(size=shape) * self.__jitter + ref).astype( + dtype + ), + name=name, ) else: @@ -191,7 +211,7 @@ def all_params(self): return params @property - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def sum_logdets(self): dets = [self.logdet] current = self @@ -208,13 +228,13 @@ def forward(self): def logdet(self): raise NotImplementedError - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def forward_pass(self, z0): ret = theano.clone(self.forward, {self.root.z0: z0}) try: - ret.tag.test_value = np.random.normal( - size=z0.tag.test_value.shape - ).astype(self.z0.dtype) + ret.tag.test_value = np.random.normal(size=z0.tag.test_value.shape).astype( + self.z0.dtype + ) except AttributeError: ret.tag.test_value = self.root.z0.tag.test_value return ret @@ -234,7 +254,7 @@ def formula(self): current = self while not current.isroot: current = current.parent - f = current.short_name + '-' + f + f = current.short_name + "-" + f return f @property @@ -253,7 +273,7 @@ def get_param_spec_for(cls, **kwargs): return res def __repr__(self): - return 'Flow{%s}' % self.short_name + return "Flow{%s}" % self.short_name def __str__(self): return self.short_name @@ -281,49 +301,49 @@ def __call__(self, *args): class LinearFlow(AbstractFlow): - __param_spec__ = dict(u=('d', ), w=('d', ), b=()) + __param_spec__ = dict(u=("d",), w=("d",), b=()) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init__(self, h, u=None, w=None, b=None, **kwargs): self.h = h super(LinearFlow, self).__init__(**kwargs) - u = self.add_param(u, 'u') - w = self.add_param(w, 'w') - b = self.add_param(b, 'b') + u = self.add_param(u, "u") + w = self.add_param(w, "w") + b = self.add_param(b, "b") self.shared_params = dict(u=u, w=w, b=b) self.u_, self.w_ = self.make_uw(self.u, self.w) - u = property(lambda self: self.shared_params['u']) - w = property(lambda self: self.shared_params['w']) - b = property(lambda self: self.shared_params['b']) + u = property(lambda self: self.shared_params["u"]) + w = property(lambda self: self.shared_params["w"]) + b = property(lambda self: self.shared_params["b"]) def make_uw(self, u, w): - raise NotImplementedError('Need to implement valid U, W transform') + raise NotImplementedError("Need to implement valid U, W transform") @node_property def forward(self): z = self.z0 # sxd - u = self.u_ # d - w = self.w_ # d - b = self.b # . - h = self.h # f + u = self.u_ # d + w = self.w_ # d + b = self.b # . + h = self.h # f # h(sxd \dot d + .) = s if not self.batched: hwz = h(z.dot(w) + b) # s # sxd + (s \outer d) = sxd - z1 = z + tt.outer(hwz, u) # sxd + z1 = z + tt.outer(hwz, u) # sxd return z1 else: z = z.swapaxes(0, 1) # z bxsxd # u bxd # w bxd - b = b.dimshuffle(0, 'x') + b = b.dimshuffle(0, "x") # b bx- hwz = h(tt.batched_dot(z, w) + b) # bxs # bxsxd + (bxsx- * bx-xd) = bxsxd - hwz = hwz.dimshuffle(0, 1, 'x') # bxsx- - u = u.dimshuffle(0, 'x', 1) # bx-xd + hwz = hwz.dimshuffle(0, 1, "x") # bxsx- + u = u.dimshuffle(0, "x", 1) # bx-xd z1 = z + hwz * u # bxsxd return z1.swapaxes(0, 1) # sxbxd @@ -336,21 +356,23 @@ def logdet(self): deriv = self.h.deriv # f' if not self.batched: # f'(sxd \dot d + .) * -xd = sxd - phi = deriv(z.dot(w) + b).dimshuffle(0, 'x') * w.dimshuffle('x', 0) + phi = deriv(z.dot(w) + b).dimshuffle(0, "x") * w.dimshuffle("x", 0) # \abs(. + sxd \dot d) = s - det = tt.abs_(1. + phi.dot(u)) + det = tt.abs_(1.0 + phi.dot(u)) return tt.log(det) else: z = z.swapaxes(0, 1) - b = b.dimshuffle(0, 'x') + b = b.dimshuffle(0, "x") # z bxsxd # u bxd # w bxd # b bx-x- # f'(bxsxd \bdot bxd + bx-x-) * bx-xd = bxsxd - phi = deriv(tt.batched_dot(z, w) + b).dimshuffle(0, 1, 'x') * w.dimshuffle(0, 'x', 1) + phi = deriv(tt.batched_dot(z, w) + b).dimshuffle(0, 1, "x") * w.dimshuffle( + 0, "x", 1 + ) # \abs(. + bxsxd \bdot bxd) = bxs - det = tt.abs_(1. + tt.batched_dot(phi, u)) # bxs + det = tt.abs_(1.0 + tt.batched_dot(phi, u)) # bxs return tt.log(det).sum(0) # s @@ -361,12 +383,12 @@ class Tanh(FlowFn): @staticmethod def deriv(*args): x, = args - return 1. - tt.tanh(x) ** 2 + return 1.0 - tt.tanh(x) ** 2 @AbstractFlow.register class PlanarFlow(LinearFlow): - short_name = 'planar' + short_name = "planar" def __init__(self, **kwargs): super(PlanarFlow, self).__init__(h=Tanh(), **kwargs) @@ -376,55 +398,44 @@ def make_uw(self, u, w): # u_ : d # w_ : d wu = u.dot(w) # . - mwu = -1. + tt.nnet.softplus(wu) # . + mwu = -1.0 + tt.nnet.softplus(wu) # . # d + (. - .) * d / . - u_h = ( - u+(mwu-wu) * - w/((w**2).sum()+1e-10) - ) + u_h = u + (mwu - wu) * w / ((w ** 2).sum() + 1e-10) return u_h, w else: # u_ : bxd # w_ : bxd - wu = (u*w).sum(-1, keepdims=True) # bx- - mwu = -1. + tt.nnet.softplus(wu) # bx- + wu = (u * w).sum(-1, keepdims=True) # bx- + mwu = -1.0 + tt.nnet.softplus(wu) # bx- # bxd + (bx- - bx-) * bxd / bx- = bxd - u_h = ( - u - + (mwu - wu) - * w / ((w ** 2).sum(-1, keepdims=True) + 1e-10) - ) + u_h = u + (mwu - wu) * w / ((w ** 2).sum(-1, keepdims=True) + 1e-10) return u_h, w class ReferencePointFlow(AbstractFlow): - __param_spec__ = dict(a=(), b=(), z_ref=('d', )) + __param_spec__ = dict(a=(), b=(), z_ref=("d",)) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init__(self, h, a=None, b=None, z_ref=None, **kwargs): super(ReferencePointFlow, self).__init__(**kwargs) - a = self.add_param(a, 'a') - b = self.add_param(b, 'b') - if hasattr(self.z0, 'tag') and hasattr(self.z0.tag, 'test_value'): + a = self.add_param(a, "a") + b = self.add_param(b, "b") + if hasattr(self.z0, "tag") and hasattr(self.z0.tag, "test_value"): z_ref = self.add_param( - z_ref, 'z_ref', - ref=self.z0.tag.test_value[0], - dtype=self.z0.dtype + z_ref, "z_ref", ref=self.z0.tag.test_value[0], dtype=self.z0.dtype ) else: - z_ref = self.add_param( - z_ref, 'z_ref', dtype=self.z0.dtype - ) + z_ref = self.add_param(z_ref, "z_ref", dtype=self.z0.dtype) self.h = h self.shared_params = dict(a=a, b=b, z_ref=z_ref) self.a_, self.b_ = self.make_ab(self.a, self.b) - a = property(lambda self: self.shared_params['a']) - b = property(lambda self: self.shared_params['b']) - z_ref = property(lambda self: self.shared_params['z_ref']) + a = property(lambda self: self.shared_params["a"]) + b = property(lambda self: self.shared_params["b"]) + z_ref = property(lambda self: self.shared_params["z_ref"]) def make_ab(self, a, b): - raise NotImplementedError('Need to specify how to get a, b') + raise NotImplementedError("Need to specify how to get a, b") @node_property def forward(self): @@ -439,13 +450,13 @@ def forward(self): # z bxsxd # z_ref bx-xd z = z.swapaxes(0, 1) - a = a.dimshuffle(0, 'x', 'x') - b = b.dimshuffle(0, 'x', 'x') - z_ref = z_ref.dimshuffle(0, 'x', 1) + a = a.dimshuffle(0, "x", "x") + b = b.dimshuffle(0, "x", "x") + z_ref = z_ref.dimshuffle(0, "x", 1) r = (z - z_ref).norm(2, axis=-1, keepdims=True) # sx- (bxsx-) # global: sxd + . * h(., sx-) * (sxd - sxd) = sxd # local: bxsxd + b * h(b, bxsx-) * (bxsxd - bxsxd) = bxsxd - z1 = z + b * h(a, r) * (z-z_ref) + z1 = z + b * h(a, r) * (z - z_ref) if self.batched: z1 = z1.swapaxes(0, 1) return z1 @@ -461,9 +472,9 @@ def logdet(self): deriv = self.h.deriv # h'(a, r) if self.batched: z = z.swapaxes(0, 1) - a = a.dimshuffle(0, 'x', 'x') - b = b.dimshuffle(0, 'x', 'x') - z_ref = z_ref.dimshuffle(0, 'x', 1) + a = a.dimshuffle(0, "x", "x") + b = b.dimshuffle(0, "x", "x") + z_ref = z_ref.dimshuffle(0, "x", 1) # a bx-x- # b bx-x- # z bxsxd @@ -471,7 +482,7 @@ def logdet(self): r = (z - z_ref).norm(2, axis=-1, keepdims=True) # s har = h(a, r) dar = deriv(a, r) - logdet = tt.log((1. + b*har)**(d-1.) * (1. + b*har + b*dar*r)) + logdet = tt.log((1.0 + b * har) ** (d - 1.0) * (1.0 + b * har + b * dar * r)) if self.batched: return logdet.sum([0, -1]) else: @@ -482,22 +493,22 @@ class Radial(FlowFn): @staticmethod def fn(*args): a, r = args - return 1./(a+r) + return 1.0 / (a + r) @staticmethod def inv(*args): a, y = args - return 1./y - a + return 1.0 / y - a @staticmethod def deriv(*args): a, r = args - return -1. / (a + r) ** 2 + return -1.0 / (a + r) ** 2 @AbstractFlow.register class RadialFlow(ReferencePointFlow): - short_name = 'radial' + short_name = "radial" def __init__(self, **kwargs): super(RadialFlow, self).__init__(Radial(), **kwargs) @@ -510,15 +521,15 @@ def make_ab(self, a, b): @AbstractFlow.register class LocFlow(AbstractFlow): - __param_spec__ = dict(loc=('d', )) - short_name = 'loc' + __param_spec__ = dict(loc=("d",)) + short_name = "loc" def __init__(self, loc=None, **kwargs): super(LocFlow, self).__init__(**kwargs) - loc = self.add_param(loc, 'loc') + loc = self.add_param(loc, "loc") self.shared_params = dict(loc=loc) - loc = property(lambda self: self.shared_params['loc']) + loc = property(lambda self: self.shared_params["loc"]) @node_property def forward(self): @@ -533,17 +544,17 @@ def logdet(self): @AbstractFlow.register class ScaleFlow(AbstractFlow): - __param_spec__ = dict(rho=('d', )) - short_name = 'scale' + __param_spec__ = dict(rho=("d",)) + short_name = "scale" - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init__(self, rho=None, **kwargs): super(ScaleFlow, self).__init__(**kwargs) - rho = self.add_param(rho, 'rho') + rho = self.add_param(rho, "rho") self.scale = rho2sd(rho) self.shared_params = dict(rho=rho) - log_scale = property(lambda self: self.shared_params['log_scale']) + log_scale = property(lambda self: self.shared_params["log_scale"]) @node_property def forward(self): @@ -558,28 +569,28 @@ def logdet(self): @AbstractFlow.register class HouseholderFlow(AbstractFlow): - __param_spec__ = dict(v=('d', )) - short_name = 'hh' + __param_spec__ = dict(v=("d",)) + short_name = "hh" - @change_flags(compute_test_value='raise') + @change_flags(compute_test_value="raise") def __init__(self, v=None, **kwargs): super(HouseholderFlow, self).__init__(**kwargs) - v = self.add_param(v, 'v') + v = self.add_param(v, "v") self.shared_params = dict(v=v) if self.batched: - vv = v.dimshuffle(0, 1, 'x') * v.dimshuffle(0, 'x', 1) - I = tt.eye(self.dim).dimshuffle('x', 0, 1) - vvn = (1e-10+(v**2).sum(-1)).dimshuffle(0, 'x', 'x') + vv = v.dimshuffle(0, 1, "x") * v.dimshuffle(0, "x", 1) + I = tt.eye(self.dim).dimshuffle("x", 0, 1) + vvn = (1e-10 + (v ** 2).sum(-1)).dimshuffle(0, "x", "x") else: vv = tt.outer(v, v) I = tt.eye(self.dim) - vvn = ((v**2).sum(-1)+1e-10) - self.H = I - 2. * vv / vvn + vvn = (v ** 2).sum(-1) + 1e-10 + self.H = I - 2.0 * vv / vvn @node_property def forward(self): z = self.z0 # sxd - H = self.H # dxd + H = self.H # dxd if self.batched: return tt.batched_dot(z.swapaxes(0, 1), H).swapaxes(0, 1) else: diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 66a2198bf2..15f44fae57 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -10,7 +10,10 @@ import pymc3 as pm from pymc3.variational import test_functions from pymc3.variational.approximations import ( - MeanField, FullRank, Empirical, NormalizingFlow + MeanField, + FullRank, + Empirical, + NormalizingFlow, ) from pymc3.variational.operators import KL, KSD from . import opvi @@ -18,22 +21,22 @@ logger = logging.getLogger(__name__) __all__ = [ - 'ADVI', - 'FullRankADVI', - 'SVGD', - 'ASVGD', - 'NFVI', - 'Inference', - 'ImplicitGradient', - 'KLqp', - 'fit' + "ADVI", + "FullRankADVI", + "SVGD", + "ASVGD", + "NFVI", + "Inference", + "ImplicitGradient", + "KLqp", + "fit", ] -State = collections.namedtuple('State', 'i,step,callbacks,score') +State = collections.namedtuple("State", "i,step,callbacks,score") class Inference(object): - R"""**Base class for Variational Inference** + r"""**Base class for Variational Inference** Communicates Operator, Approximation and Test Function to build Objective Function @@ -59,9 +62,10 @@ def _maybe_score(self, score): if score is None: score = returns_loss elif score and not returns_loss: - warnings.warn('method `fit` got `score == True` but %s ' - 'does not return loss. Ignoring `score` argument' - % self.objective.op) + warnings.warn( + "method `fit` got `score == True` but %s " + "does not return loss. Ignoring `score` argument" % self.objective.op + ) score = False else: pass @@ -69,11 +73,10 @@ def _maybe_score(self, score): def run_profiling(self, n=1000, score=None, **kwargs): score = self._maybe_score(score) - fn_kwargs = kwargs.pop('fn_kwargs', dict()) - fn_kwargs['profile'] = True + fn_kwargs = kwargs.pop("fn_kwargs", dict()) + fn_kwargs["profile"] = True step_func = self.objective.step_function( - score=score, fn_kwargs=fn_kwargs, - **kwargs + score=score, fn_kwargs=fn_kwargs, **kwargs ) progress = tqdm.trange(n) try: @@ -85,8 +88,7 @@ def run_profiling(self, n=1000, score=None, **kwargs): progress.close() return step_func.profile - def fit(self, n=10000, score=None, callbacks=None, progressbar=True, - **kwargs): + def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs): """Perform Operator Variational Inference Parameters @@ -158,37 +160,40 @@ def _iterate_without_loss(self, s, _, step_func, progress, callbacks): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(np.isnan(current_param))[0] - errmsg = ['NaN occurred in optimization. '] - suggest_solution = 'Try tracking this parameter: ' \ - 'http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters' + errmsg = ["NaN occurred in optimization. "] + suggest_solution = ( + "Try tracking this parameter: " + "http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters" + ) try: for ii in index: - errmsg.append('The current approximation of RV `{}`.ravel()[{}]' - ' is NaN.'.format(*name_slc[ii])) + errmsg.append( + "The current approximation of RV `{}`.ravel()[{}]" + " is NaN.".format(*name_slc[ii]) + ) errmsg.append(suggest_solution) except IndexError: pass - raise FloatingPointError('\n'.join(errmsg)) + raise FloatingPointError("\n".join(errmsg)) for callback in callbacks: - callback(self.approx, None, i+s+1) + callback(self.approx, None, i + s + 1) except (KeyboardInterrupt, StopIteration) as e: progress.close() if isinstance(e, StopIteration): logger.info(str(e)) finally: progress.close() - return State(i+s, step=step_func, - callbacks=callbacks, - score=False) + return State(i + s, step=step_func, callbacks=callbacks, score=False) def _iterate_with_loss(self, s, n, step_func, progress, callbacks): def _infmean(input_array): """Return the mean of the finite values of the array""" - input_array = input_array[np.isfinite(input_array)].astype('float64') + input_array = input_array[np.isfinite(input_array)].astype("float64") if len(input_array) == 0: return np.nan else: return np.mean(input_array) + scores = np.empty(n) scores[:] = np.nan i = 0 @@ -207,26 +212,29 @@ def _infmean(input_array): for i in range(slclen): name_slc.append((vmap_.var, i)) index = np.where(np.isnan(current_param))[0] - errmsg = ['NaN occurred in optimization. '] - suggest_solution = 'Try tracking this parameter: ' \ - 'http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters' + errmsg = ["NaN occurred in optimization. "] + suggest_solution = ( + "Try tracking this parameter: " + "http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters" + ) try: for ii in index: - errmsg.append('The current approximation of RV `{}`.ravel()[{}]' - ' is NaN.'.format(*name_slc[ii])) + errmsg.append( + "The current approximation of RV `{}`.ravel()[{}]" + " is NaN.".format(*name_slc[ii]) + ) errmsg.append(suggest_solution) except IndexError: pass - raise FloatingPointError('\n'.join(errmsg)) + raise FloatingPointError("\n".join(errmsg)) scores[i] = e if i % 10 == 0: - avg_loss = _infmean(scores[max(0, i - 1000):i + 1]) - progress.set_description('Average Loss = {:,.5g}'.format(avg_loss)) - avg_loss = scores[max(0, i - 1000):i + 1].mean() - progress.set_description( - 'Average Loss = {:,.5g}'.format(avg_loss)) + avg_loss = _infmean(scores[max(0, i - 1000) : i + 1]) + progress.set_description("Average Loss = {:,.5g}".format(avg_loss)) + avg_loss = scores[max(0, i - 1000) : i + 1].mean() + progress.set_description("Average Loss = {:,.5g}".format(avg_loss)) for callback in callbacks: - callback(self.approx, scores[:i + 1], i+s+1) + callback(self.approx, scores[: i + 1], i + s + 1) except (KeyboardInterrupt, StopIteration) as e: # pragma: no cover # do not print log on the same line progress.close() @@ -234,32 +242,34 @@ def _infmean(input_array): if isinstance(e, StopIteration): logger.info(str(e)) if n < 10: - logger.info('Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}'.format( - i, 100 * i // n, scores[i])) + logger.info( + "Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}".format( + i, 100 * i // n, scores[i] + ) + ) else: - avg_loss = _infmean(scores[min(0, i - 1000):i + 1]) - logger.info('Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}'.format( - i, 100 * i // n, avg_loss)) + avg_loss = _infmean(scores[min(0, i - 1000) : i + 1]) + logger.info( + "Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}".format( + i, 100 * i // n, avg_loss + ) + ) else: if n < 10: - logger.info( - 'Finished [100%]: Loss = {:,.5g}'.format(scores[-1])) + logger.info("Finished [100%]: Loss = {:,.5g}".format(scores[-1])) else: - avg_loss = _infmean(scores[max(0, i - 1000):i + 1]) - logger.info( - 'Finished [100%]: Average Loss = {:,.5g}'.format(avg_loss)) + avg_loss = _infmean(scores[max(0, i - 1000) : i + 1]) + logger.info("Finished [100%]: Average Loss = {:,.5g}".format(avg_loss)) finally: progress.close() self.hist = np.concatenate([self.hist, scores]) - return State(i+s, step=step_func, - callbacks=callbacks, - score=True) + return State(i + s, step=step_func, callbacks=callbacks, score=True) def refine(self, n, progressbar=True): """Refine the solution using the last compiled step function """ if self.state is None: - raise TypeError('Need to call `.fit` first') + raise TypeError("Need to call `.fit` first") i, step, callbacks, score = self.state with tqdm.trange(n, disable=not progressbar) as progress: if score: @@ -293,12 +303,13 @@ class KLqp(Inference): Understanding disentangling in :math:`\beta`-VAE arXiv preprint 1804.03599 """ - def __init__(self, approx, beta=1.): + + def __init__(self, approx, beta=1.0): super(KLqp, self).__init__(KL, approx, None, beta=beta) class ADVI(KLqp): - R"""**Automatic Differentiation Variational Inference (ADVI)** + r"""**Automatic Differentiation Variational Inference (ADVI)** This class implements the meanfield ADVI, where the variational posterior distribution is assumed to be spherical Gaussian without @@ -446,7 +457,7 @@ def __init__(self, *args, **kwargs): class FullRankADVI(KLqp): - R"""**Full Rank Automatic Differentiation Variational Inference (ADVI)** + r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)** Parameters ---------- @@ -491,17 +502,15 @@ class ImplicitGradient(Inference): only for large number of samples. Larger temperature is needed for small number of samples but there is no theoretical approach to choose the best one in such case. """ + def __init__(self, approx, estimator=KSD, kernel=test_functions.rbf, **kwargs): super(ImplicitGradient, self).__init__( - op=estimator, - approx=approx, - tf=kernel, - **kwargs + op=estimator, approx=approx, tf=kernel, **kwargs ) class SVGD(ImplicitGradient): - R"""**Stein Variational Gradient Descent** + r"""**Stein Variational Gradient Descent** This inference is based on Kernelized Stein Discrepancy it's main idea is to move initial noisy particles so that @@ -551,23 +560,33 @@ class SVGD(ImplicitGradient): arXiv:1704.02399 """ - def __init__(self, n_particles=100, jitter=1, model=None, start=None, - random_seed=None, estimator=KSD, kernel=test_functions.rbf, **kwargs): - if kwargs.get('local_rv') is not None: - raise opvi.AEVBInferenceError('SVGD does not support local groups') + def __init__( + self, + n_particles=100, + jitter=1, + model=None, + start=None, + random_seed=None, + estimator=KSD, + kernel=test_functions.rbf, + **kwargs + ): + if kwargs.get("local_rv") is not None: + raise opvi.AEVBInferenceError("SVGD does not support local groups") empirical = Empirical( - size=n_particles, jitter=jitter, - start=start, model=model, random_seed=random_seed) + size=n_particles, + jitter=jitter, + start=start, + model=model, + random_seed=random_seed, + ) super(SVGD, self).__init__( - approx=empirical, - estimator=estimator, - kernel=kernel, - **kwargs + approx=empirical, estimator=estimator, kernel=kernel, **kwargs ) class ASVGD(ImplicitGradient): - R"""**Amortized Stein Variational Gradient Descent** + r"""**Amortized Stein Variational Gradient Descent** **not suggested to use** @@ -612,38 +631,49 @@ class ASVGD(ImplicitGradient): """ def __init__(self, approx=None, estimator=KSD, kernel=test_functions.rbf, **kwargs): - warnings.warn('You are using experimental inference Operator. ' - 'It requires careful choice of temperature, default is 1. ' - 'Default temperature works well for low dimensional problems and ' - 'for significant `n_obj_mc`. Temperature > 1 gives more exploration ' - 'power to algorithm, < 1 leads to undesirable results. Please take ' - 'it in account when looking at inference result. Posterior variance ' - 'is often **underestimated** when using temperature = 1.') + warnings.warn( + "You are using experimental inference Operator. " + "It requires careful choice of temperature, default is 1. " + "Default temperature works well for low dimensional problems and " + "for significant `n_obj_mc`. Temperature > 1 gives more exploration " + "power to algorithm, < 1 leads to undesirable results. Please take " + "it in account when looking at inference result. Posterior variance " + "is often **underestimated** when using temperature = 1." + ) if approx is None: approx = FullRank( - model=kwargs.pop('model', None), - local_rv=kwargs.pop('local_rv', None) + model=kwargs.pop("model", None), local_rv=kwargs.pop("local_rv", None) ) super(ASVGD, self).__init__( - estimator=estimator, - approx=approx, - kernel=kernel, - **kwargs + estimator=estimator, approx=approx, kernel=kernel, **kwargs ) - def fit(self, n=10000, score=None, callbacks=None, progressbar=True, - obj_n_mc=500, **kwargs): + def fit( + self, + n=10000, + score=None, + callbacks=None, + progressbar=True, + obj_n_mc=500, + **kwargs + ): return super(ASVGD, self).fit( - n=n, score=score, callbacks=callbacks, - progressbar=progressbar, obj_n_mc=obj_n_mc, **kwargs) + n=n, + score=score, + callbacks=callbacks, + progressbar=progressbar, + obj_n_mc=obj_n_mc, + **kwargs + ) def run_profiling(self, n=1000, score=None, obj_n_mc=500, **kwargs): return super(ASVGD, self).run_profiling( - n=n, score=score, obj_n_mc=obj_n_mc, **kwargs) + n=n, score=score, obj_n_mc=obj_n_mc, **kwargs + ) class NFVI(KLqp): - R"""**Normalizing Flow based :class:`KLqp` inference** + r"""**Normalizing Flow based :class:`KLqp` inference** Normalizing flow is a series of invertible transformations on initial distribution. @@ -697,9 +727,17 @@ def __init__(self, *args, **kwargs): super(NFVI, self).__init__(NormalizingFlow(*args, **kwargs)) -def fit(n=10000, local_rv=None, method='advi', model=None, - random_seed=None, start=None, inf_kwargs=None, **kwargs): - R"""Handy shortcut for using inference methods in functional way +def fit( + n=10000, + local_rv=None, + method="advi", + model=None, + random_seed=None, + start=None, + inf_kwargs=None, + **kwargs +): + r"""Handy shortcut for using inference methods in functional way Parameters ---------- @@ -767,42 +805,33 @@ def fit(n=10000, local_rv=None, method='advi', model=None, else: inf_kwargs = inf_kwargs.copy() if local_rv is not None: - inf_kwargs['local_rv'] = local_rv + inf_kwargs["local_rv"] = local_rv if random_seed is not None: - inf_kwargs['random_seed'] = random_seed + inf_kwargs["random_seed"] = random_seed if start is not None: - inf_kwargs['start'] = start + inf_kwargs["start"] = start if model is None: model = pm.modelcontext(model) _select = dict( - advi=ADVI, - fullrank_advi=FullRankADVI, - svgd=SVGD, - asvgd=ASVGD, - nfvi=NFVI + advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI ) if isinstance(method, str): method = method.lower() - if method.startswith('nfvi='): + if method.startswith("nfvi="): formula = method[5:] - inference = NFVI( - formula, - **inf_kwargs - ) + inference = NFVI(formula, **inf_kwargs) elif method in _select: - inference = _select[method]( - model=model, - **inf_kwargs - ) + inference = _select[method](model=model, **inf_kwargs) else: - raise KeyError('method should be one of %s ' - 'or Inference instance' % - set(_select.keys())) + raise KeyError( + "method should be one of %s " + "or Inference instance" % set(_select.keys()) + ) elif isinstance(method, Inference): inference = method else: - raise TypeError('method should be one of %s ' - 'or Inference instance' % - set(_select.keys())) + raise TypeError( + "method should be one of %s " "or Inference instance" % set(_select.keys()) + ) return inference.fit(n, **kwargs) diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py index dc1a9f1226..64cc023366 100644 --- a/pymc3/variational/operators.py +++ b/pymc3/variational/operators.py @@ -5,14 +5,11 @@ from pymc3.variational.stein import Stein import pymc3 as pm -__all__ = [ - 'KL', - 'KSD' -] +__all__ = ["KL", "KSD"] class KL(Operator): - R"""**Operator based on Kullback Leibler Divergence** + r"""**Operator based on Kullback Leibler Divergence** This operator constructs Evidence Lower Bound (ELBO) objective @@ -35,18 +32,19 @@ class KL(Operator): Beta parameter for KL divergence, scales the regularization term. """ - def __init__(self, approx, beta=1.): + def __init__(self, approx, beta=1.0): Operator.__init__(self, approx) self.beta = pm.floatX(beta) def apply(self, f): return -self.datalogp_norm + self.beta * (self.logq_norm - self.varlogp_norm) + # SVGD Implementation class KSDObjective(ObjectiveFunction): - R"""Helper class for construction loss and updates for variational inference + r"""Helper class for construction loss and updates for variational inference Parameters ---------- @@ -58,10 +56,10 @@ class KSDObjective(ObjectiveFunction): def __init__(self, op, tf): if not isinstance(op, KSD): - raise opvi.ParametrizationError('Op should be KSD') + raise opvi.ParametrizationError("Op should be KSD") ObjectiveFunction.__init__(self, op, tf) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __call__(self, nmc, **kwargs): op = self.op # type: KSD grad = op.apply(self.tf) @@ -69,17 +67,19 @@ def __call__(self, nmc, **kwargs): z = self.approx.joint_histogram else: z = self.approx.symbolic_random - if 'more_obj_params' in kwargs: - params = self.obj_params + kwargs['more_obj_params'] + if "more_obj_params" in kwargs: + params = self.obj_params + kwargs["more_obj_params"] else: - params = self.test_params + kwargs['more_tf_params'] + params = self.test_params + kwargs["more_tf_params"] grad *= pm.floatX(-1) grads = tt.grad(None, params, known_grads={z: grad}) - return self.approx.set_size_and_deterministic(grads, nmc, 0, kwargs.get('more_replacements')) + return self.approx.set_size_and_deterministic( + grads, nmc, 0, kwargs.get("more_replacements") + ) class KSD(Operator): - R"""**Operator based on Kernelized Stein Discrepancy** + r"""**Operator based on Kernelized Stein Discrepancy** Input: A target distribution with density function :math:`p(x)` and a set of initial particles :math:`\{x^0_i\}^n_{i=1}` @@ -120,5 +120,6 @@ def apply(self, f): approx=self.approx, kernel=f, use_histogram=self.approx.all_histograms, - temperature=self.temperature) + temperature=self.temperature, + ) return pm.floatX(-1) * stein.grad diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index 7b97c25e4d..da3738aad7 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -1,4 +1,4 @@ -R""" +r""" Variational inference is a great approach for doing really complex, often intractable Bayesian inference in approximate form. Common methods (e.g. ADVI) lack from complexity so that approximate posterior does not @@ -42,21 +42,13 @@ import pymc3 as pm from pymc3.util import get_transformed from .updates import adagrad_window -from ..blocking import ( - ArrayOrdering, DictToArrayBijection, VarMap -) +from ..blocking import ArrayOrdering, DictToArrayBijection, VarMap from ..model import modelcontext from ..theanof import tt_rng, change_flags, identity from ..util import get_default_varnames from ..memoize import WithMemoization, memoize -__all__ = [ - 'ObjectiveFunction', - 'Operator', - 'TestFunction', - 'Group', - 'Approximation' -] +__all__ = ["ObjectiveFunction", "Operator", "TestFunction", "Group", "Approximation"] class VariationalInferenceError(Exception): @@ -96,7 +88,9 @@ def inner(*args, **kwargs): res = f(*args, **kwargs) res.name = name return res + return inner + return wrap @@ -106,13 +100,19 @@ def node_property(f): if isinstance(f, str): def wrapper(fn): - return property(memoize(change_flags(compute_test_value='off')(append_name(f)(fn)), bound=True)) + return property( + memoize( + change_flags(compute_test_value="off")(append_name(f)(fn)), + bound=True, + ) + ) + return wrapper else: - return property(memoize(change_flags(compute_test_value='off')(f), bound=True)) + return property(memoize(change_flags(compute_test_value="off")(f), bound=True)) -@change_flags(compute_test_value='ignore') +@change_flags(compute_test_value="ignore") def try_to_set_test_value(node_in, node_out, s): _s = s if s is None: @@ -123,8 +123,8 @@ def try_to_set_test_value(node_in, node_out, s): if not isinstance(node_out, (list, tuple)): node_out = [node_out] for i, o in zip(node_in, node_out): - if hasattr(i.tag, 'test_value'): - if not hasattr(s.tag, 'test_value'): + if hasattr(i.tag, "test_value"): + if not hasattr(s.tag, "test_value"): continue else: tv = i.tag.test_value[None, ...] @@ -137,11 +137,12 @@ def try_to_set_test_value(node_in, node_out, s): class ObjectiveUpdates(theano.OrderedUpdates): """OrderedUpdates extension for storing loss """ + loss = None def _warn_not_used(smth, where): - warnings.warn('`%s` is not used for %s and ignored' % (smth, where)) + warnings.warn("`%s` is not used for %s and ignored" % (smth, where)) class ObjectiveFunction(object): @@ -163,9 +164,18 @@ def __init__(self, op, tf): test_params = property(lambda self: self.tf.params) approx = property(lambda self: self.op.approx) - def updates(self, obj_n_mc=None, tf_n_mc=None, obj_optimizer=adagrad_window, test_optimizer=adagrad_window, - more_obj_params=None, more_tf_params=None, more_updates=None, - more_replacements=None, total_grad_norm_constraint=None): + def updates( + self, + obj_n_mc=None, + tf_n_mc=None, + obj_optimizer=adagrad_window, + test_optimizer=adagrad_window, + more_obj_params=None, + more_tf_params=None, + more_updates=None, + more_replacements=None, + total_grad_norm_constraint=None, + ): """Calculate gradients for objective function, test function and then constructs updates for optimization step @@ -204,68 +214,90 @@ def updates(self, obj_n_mc=None, tf_n_mc=None, obj_optimizer=adagrad_window, tes test_optimizer=test_optimizer, more_tf_params=more_tf_params, more_replacements=more_replacements, - total_grad_norm_constraint=total_grad_norm_constraint + total_grad_norm_constraint=total_grad_norm_constraint, ) else: if tf_n_mc is not None: - _warn_not_used('tf_n_mc', self.op) + _warn_not_used("tf_n_mc", self.op) if more_tf_params: - _warn_not_used('more_tf_params', self.op) + _warn_not_used("more_tf_params", self.op) self.add_obj_updates( resulting_updates, obj_n_mc=obj_n_mc, obj_optimizer=obj_optimizer, more_obj_params=more_obj_params, more_replacements=more_replacements, - total_grad_norm_constraint=total_grad_norm_constraint + total_grad_norm_constraint=total_grad_norm_constraint, ) resulting_updates.update(more_updates) return resulting_updates - def add_test_updates(self, updates, tf_n_mc=None, test_optimizer=adagrad_window, - more_tf_params=None, more_replacements=None, - total_grad_norm_constraint=None): + def add_test_updates( + self, + updates, + tf_n_mc=None, + test_optimizer=adagrad_window, + more_tf_params=None, + more_replacements=None, + total_grad_norm_constraint=None, + ): if more_tf_params is None: more_tf_params = [] if more_replacements is None: more_replacements = dict() - tf_target = self(tf_n_mc, more_tf_params=more_tf_params, more_replacements=more_replacements) - grads = pm.updates.get_or_compute_grads(tf_target, self.obj_params + more_tf_params) + tf_target = self( + tf_n_mc, more_tf_params=more_tf_params, more_replacements=more_replacements + ) + grads = pm.updates.get_or_compute_grads( + tf_target, self.obj_params + more_tf_params + ) if total_grad_norm_constraint is not None: grads = pm.total_norm_constraint(grads, total_grad_norm_constraint) - updates.update( - test_optimizer( - grads, - self.test_params + - more_tf_params)) - - def add_obj_updates(self, updates, obj_n_mc=None, obj_optimizer=adagrad_window, - more_obj_params=None, more_replacements=None, - total_grad_norm_constraint=None): + updates.update(test_optimizer(grads, self.test_params + more_tf_params)) + + def add_obj_updates( + self, + updates, + obj_n_mc=None, + obj_optimizer=adagrad_window, + more_obj_params=None, + more_replacements=None, + total_grad_norm_constraint=None, + ): if more_obj_params is None: more_obj_params = [] if more_replacements is None: more_replacements = dict() - obj_target = self(obj_n_mc, more_obj_params=more_obj_params, more_replacements=more_replacements) - grads = pm.updates.get_or_compute_grads(obj_target, self.obj_params + more_obj_params) + obj_target = self( + obj_n_mc, + more_obj_params=more_obj_params, + more_replacements=more_replacements, + ) + grads = pm.updates.get_or_compute_grads( + obj_target, self.obj_params + more_obj_params + ) if total_grad_norm_constraint is not None: grads = pm.total_norm_constraint(grads, total_grad_norm_constraint) - updates.update( - obj_optimizer( - grads, - self.obj_params + - more_obj_params)) + updates.update(obj_optimizer(grads, self.obj_params + more_obj_params)) if self.op.returns_loss: updates.loss = obj_target - @change_flags(compute_test_value='off') - def step_function(self, obj_n_mc=None, tf_n_mc=None, - obj_optimizer=adagrad_window, test_optimizer=adagrad_window, - more_obj_params=None, more_tf_params=None, - more_updates=None, more_replacements=None, - total_grad_norm_constraint=None, - score=False, fn_kwargs=None): - R"""Step function that should be called on each optimization step. + @change_flags(compute_test_value="off") + def step_function( + self, + obj_n_mc=None, + tf_n_mc=None, + obj_optimizer=adagrad_window, + test_optimizer=adagrad_window, + more_obj_params=None, + more_tf_params=None, + more_updates=None, + more_replacements=None, + total_grad_norm_constraint=None, + score=False, + fn_kwargs=None, + ): + r"""Step function that should be called on each optimization step. Generally it solves the following problem: @@ -305,25 +337,29 @@ def step_function(self, obj_n_mc=None, tf_n_mc=None, if fn_kwargs is None: fn_kwargs = {} if score and not self.op.returns_loss: - raise NotImplementedError('%s does not have loss' % self.op) - updates = self.updates(obj_n_mc=obj_n_mc, tf_n_mc=tf_n_mc, - obj_optimizer=obj_optimizer, - test_optimizer=test_optimizer, - more_obj_params=more_obj_params, - more_tf_params=more_tf_params, - more_updates=more_updates, - more_replacements=more_replacements, - total_grad_norm_constraint=total_grad_norm_constraint) + raise NotImplementedError("%s does not have loss" % self.op) + updates = self.updates( + obj_n_mc=obj_n_mc, + tf_n_mc=tf_n_mc, + obj_optimizer=obj_optimizer, + test_optimizer=test_optimizer, + more_obj_params=more_obj_params, + more_tf_params=more_tf_params, + more_updates=more_updates, + more_replacements=more_replacements, + total_grad_norm_constraint=total_grad_norm_constraint, + ) if score: - step_fn = theano.function( - [], updates.loss, updates=updates, **fn_kwargs) + step_fn = theano.function([], updates.loss, updates=updates, **fn_kwargs) else: step_fn = theano.function([], None, updates=updates, **fn_kwargs) return step_fn - @change_flags(compute_test_value='off') - def score_function(self, sc_n_mc=None, more_replacements=None, fn_kwargs=None): # pragma: no cover - R"""Compile scoring function that operates which takes no inputs and returns Loss + @change_flags(compute_test_value="off") + def score_function( + self, sc_n_mc=None, more_replacements=None, fn_kwargs=None + ): # pragma: no cover + r"""Compile scoring function that operates which takes no inputs and returns Loss Parameters ---------- @@ -341,25 +377,27 @@ def score_function(self, sc_n_mc=None, more_replacements=None, fn_kwargs=None): if fn_kwargs is None: fn_kwargs = {} if not self.op.returns_loss: - raise NotImplementedError('%s does not have loss' % self.op) + raise NotImplementedError("%s does not have loss" % self.op) if more_replacements is None: more_replacements = {} loss = self(sc_n_mc, more_replacements=more_replacements) return theano.function([], loss, **fn_kwargs) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __call__(self, nmc, **kwargs): - if 'more_tf_params' in kwargs: - m = -1. + if "more_tf_params" in kwargs: + m = -1.0 else: - m = 1. + m = 1.0 a = self.op.apply(self.tf) - a = self.approx.set_size_and_deterministic(a, nmc, 0, kwargs.get('more_replacements')) + a = self.approx.set_size_and_deterministic( + a, nmc, 0, kwargs.get("more_replacements") + ) return m * self.op.T(a) class Operator(object): - R"""**Base class for Operator** + r"""**Base class for Operator** Parameters ---------- @@ -381,11 +419,14 @@ class Operator(object): def __init__(self, approx): self.approx = approx if not self.supports_aevb and approx.has_local: - raise AEVBInferenceError('%s does not support AEVB, ' - 'please change inference method' % self) + raise AEVBInferenceError( + "%s does not support AEVB, " "please change inference method" % self + ) if self.require_logq and not approx.has_logq: - raise ExplicitInferenceError('%s requires logq, but %s does not implement it' - 'please change inference method' % (self, approx)) + raise ExplicitInferenceError( + "%s requires logq, but %s does not implement it" + "please change inference method" % (self, approx) + ) inputs = property(lambda self: self.approx.inputs) logp = property(lambda self: self.approx.logp) @@ -398,8 +439,8 @@ def __init__(self, approx): logq_norm = property(lambda self: self.approx.logq_norm) model = property(lambda self: self.approx.model) - def apply(self, f): # pragma: no cover - R"""Operator itself + def apply(self, f): # pragma: no cover + r"""Operator itself .. math:: @@ -421,24 +462,25 @@ def apply(self, f): # pragma: no cover def __call__(self, f=None): if self.has_test_function: if f is None: - raise ParametrizationError('Operator %s requires TestFunction' % self) + raise ParametrizationError("Operator %s requires TestFunction" % self) else: if not isinstance(f, TestFunction): f = TestFunction.from_function(f) else: if f is not None: warnings.warn( - 'TestFunction for %s is redundant and removed' % - self, stacklevel=3) + "TestFunction for %s is redundant and removed" % self, stacklevel=3 + ) else: pass f = TestFunction() f.setup(self.approx) return self.objective_class(self, f) - def __str__(self): # pragma: no cover - return '%(op)s[%(ap)s]' % dict(op=self.__class__.__name__, - ap=self.approx.__class__.__name__) + def __str__(self): # pragma: no cover + return "%(op)s[%(ap)s]" % dict( + op=self.__class__.__name__, ap=self.approx.__class__.__name__ + ) def collect_shared_to_list(params): @@ -455,14 +497,14 @@ def collect_shared_to_list(params): """ if isinstance(params, dict): return list( - t[1] for t in sorted(params.items(), key=lambda t: t[0]) + t[1] + for t in sorted(params.items(), key=lambda t: t[0]) if isinstance(t[1], theano.compile.SharedVariable) ) elif params is None: return [] else: - raise TypeError( - 'Unknown type %s for %r, need dict or None') + raise TypeError("Unknown type %s for %r, need dict or None") class TestFunction(object): @@ -483,14 +525,14 @@ def setup(self, approx): @classmethod def from_function(cls, f): if not callable(f): - raise ParametrizationError('Need callable, got %r' % f) + raise ParametrizationError("Need callable, got %r" % f) obj = TestFunction() obj.__call__ = f return obj class Group(WithMemoization): - R"""**Base class for grouping variables in VI** + r"""**Base class for grouping variables in VI** Grouped Approximation is used for modelling mutual dependencies for a specified group of variables. Base for local and global group. @@ -713,24 +755,26 @@ class Group(WithMemoization): has_logq = True # some important defaults - initial_dist_name = 'normal' - initial_dist_map = 0. + initial_dist_name = "normal" + initial_dist_map = 0.0 # for handy access using class methods __param_spec__ = dict() - short_name = '' + short_name = "" alias_names = frozenset() __param_registry = dict() __name_registry = dict() @classmethod def register(cls, sbcls): - assert frozenset(sbcls.__param_spec__) not in cls.__param_registry, 'Duplicate __param_spec__' + assert ( + frozenset(sbcls.__param_spec__) not in cls.__param_registry + ), "Duplicate __param_spec__" cls.__param_registry[frozenset(sbcls.__param_spec__)] = sbcls - assert sbcls.short_name not in cls.__name_registry, 'Duplicate short_name' + assert sbcls.short_name not in cls.__name_registry, "Duplicate short_name" cls.__name_registry[sbcls.short_name] = sbcls for alias in sbcls.alias_names: - assert alias not in cls.__name_registry, 'Duplicate alias_name' + assert alias not in cls.__name_registry, "Duplicate alias_name" cls.__name_registry[alias] = sbcls return sbcls @@ -739,9 +783,12 @@ def group_for_params(cls, params): if pm.variational.flows.seems_like_flow_params(params): return pm.variational.approximations.NormalizingFlowGroup if frozenset(params) not in cls.__param_registry: - raise KeyError('No such group for the following params: {!r}, ' - 'only the following are supported\n\n{}' - .format(params, cls.__param_registry)) + raise KeyError( + "No such group for the following params: {!r}, " + "only the following are supported\n\n{}".format( + params, cls.__param_registry + ) + ) return cls.__param_registry[frozenset(params)] @classmethod @@ -749,37 +796,47 @@ def group_for_short_name(cls, name): if pm.variational.flows.seems_like_formula(name): return pm.variational.approximations.NormalizingFlowGroup if name.lower() not in cls.__name_registry: - raise KeyError('No such group: {!r}, ' - 'only the following are supported\n\n{}' - .format(name, cls.__name_registry)) + raise KeyError( + "No such group: {!r}, " + "only the following are supported\n\n{}".format( + name, cls.__name_registry + ) + ) return cls.__name_registry[name.lower()] def __new__(cls, group=None, vfam=None, params=None, *args, **kwargs): if cls is Group: if vfam is not None and params is not None: - raise TypeError('Cannot call Group with both `vfam` and `params` provided') + raise TypeError( + "Cannot call Group with both `vfam` and `params` provided" + ) elif vfam is not None: return super(Group, cls).__new__(cls.group_for_short_name(vfam)) elif params is not None: return super(Group, cls).__new__(cls.group_for_params(params)) else: - raise TypeError('Need to call Group with either `vfam` or `params` provided') + raise TypeError( + "Need to call Group with either `vfam` or `params` provided" + ) else: return super(Group, cls).__new__(cls) - def __init__(self, group, - vfam=None, - params=None, - random_seed=None, - model=None, - local=False, - rowwise=False, - options=None, - **kwargs): + def __init__( + self, + group, + vfam=None, + params=None, + random_seed=None, + model=None, + local=False, + rowwise=False, + options=None, + **kwargs + ): if local and not self.supports_batched: - raise LocalGroupError('%s does not support local groups' % self.__class__) + raise LocalGroupError("%s does not support local groups" % self.__class__) if local and rowwise: - raise LocalGroupError('%s does not support local grouping in rowwise mode') + raise LocalGroupError("%s does not support local grouping in rowwise mode") if isinstance(vfam, str): vfam = vfam.lower() if options is None: @@ -808,7 +865,7 @@ def get_param_spec_for(cls, **kwargs): return res def _check_user_params(self, **kwargs): - R"""*Dev* - checks user params, allocates them if they are correct, returns True. + r"""*Dev* - checks user params, allocates them if they are correct, returns True. If they are not present, returns False Parameters @@ -823,27 +880,29 @@ def _check_user_params(self, **kwargs): if user_params is None: return False if not isinstance(user_params, dict): - raise TypeError('params should be a dict') + raise TypeError("params should be a dict") givens = set(user_params.keys()) needed = set(self.__param_spec__) if givens != needed: raise ParametrizationError( - 'Passed parameters do not have a needed set of keys, ' - 'they should be equal, got {givens}, needed {needed}'.format( - givens=givens, needed=needed)) + "Passed parameters do not have a needed set of keys, " + "they should be equal, got {givens}, needed {needed}".format( + givens=givens, needed=needed + ) + ) self._user_params = dict() - spec = self.get_param_spec_for(d=self.ddim, **kwargs.pop('spec_kw', {})) + spec = self.get_param_spec_for(d=self.ddim, **kwargs.pop("spec_kw", {})) for name, param in self.user_params.items(): shape = spec[name] if self.local: - shape = (-1, ) + shape + shape = (-1,) + shape elif self.batched: - shape = (self.bdim, ) + shape + shape = (self.bdim,) + shape self._user_params[name] = tt.as_tensor(param).reshape(shape) return True def _initial_type(self, name): - R"""*Dev* - initial type with given name. The correct type depends on `self.batched` + r"""*Dev* - initial type with given name. The correct type depends on `self.batched` Parameters ---------- @@ -859,7 +918,7 @@ def _initial_type(self, name): return tt.matrix(name) def _input_type(self, name): - R"""*Dev* - input type with given name. The correct type depends on `self.batched` + r"""*Dev* - input type with given name. The correct type depends on `self.batched` Parameters ---------- @@ -874,24 +933,26 @@ def _input_type(self, name): else: return tt.vector(name) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def __init_group__(self, group): if not group: - raise GroupError('Got empty group') + raise GroupError("Got empty group") if self.group is None: # delayed init self.group = group if self.batched and len(group) > 1: if self.local: # better error message - raise LocalGroupError('Local groups with more than 1 variable are not supported') + raise LocalGroupError( + "Local groups with more than 1 variable are not supported" + ) else: - raise BatchedGroupError('Batched groups with more than 1 variable are not supported') + raise BatchedGroupError( + "Batched groups with more than 1 variable are not supported" + ) self.symbolic_initial = self._initial_type( - self.__class__.__name__ + '_symbolic_initial_tensor' - ) - self.input = self._input_type( - self.__class__.__name__ + '_symbolic_input' + self.__class__.__name__ + "_symbolic_initial_tensor" ) + self.input = self._input_type(self.__class__.__name__ + "_symbolic_input") # I do some staff that is not supported by standard __init__ # so I have to to it by myself self.ordering = ArrayOrdering([]) @@ -899,18 +960,19 @@ def __init_group__(self, group): self.group = [get_transformed(var) for var in self.group] for var in self.group: if isinstance(var.distribution, pm.Discrete): - raise ParametrizationError('Discrete variables are not supported by VI: {}' - .format(var)) + raise ParametrizationError( + "Discrete variables are not supported by VI: {}".format(var) + ) begin = self.ddim if self.batched: if var.ndim < 1: if self.local: - raise LocalGroupError('Local variable should not be scalar') + raise LocalGroupError("Local variable should not be scalar") else: - raise BatchedGroupError('Batched variable should not be scalar') + raise BatchedGroupError("Batched variable should not be scalar") self.ordering.size += (np.prod(var.dshape[1:])).astype(int) if self.local: - shape = (-1, ) + var.dshape[1:] + shape = (-1,) + var.dshape[1:] else: shape = var.dshape else: @@ -921,7 +983,7 @@ def __init_group__(self, group): self.ordering.vmap.append(vmap) self.ordering.by_name[vmap.var] = vmap vr = self.input[..., vmap.slc].reshape(shape).astype(vmap.dtyp) - vr.name = vmap.var + '_vi_replacement' + vr.name = vmap.var + "_vi_replacement" self.replacements[var] = vr self.bij = DictToArrayBijection(self.ordering, {}) @@ -1022,7 +1084,7 @@ def _new_initial(self, size, deterministic, more_replacements=None): dim, dist_name, dist_map = ( self.ddim, self.initial_dist_name, - self.initial_dist_map + self.initial_dist_map, ) dtype = self.symbolic_initial.dtype dim = tt.as_tensor(dim) @@ -1036,11 +1098,7 @@ def _new_initial(self, size, deterministic, more_replacements=None): return getattr(self._rng, dist_name)(shape) else: sample = getattr(self._rng, dist_name)(shape) - initial = tt.switch( - deterministic, - tt.ones(shape, dtype) * dist_map, - sample - ) + initial = tt.switch(deterministic, tt.ones(shape, dtype) * dist_map, sample) return initial @node_property @@ -1065,7 +1123,7 @@ def symbolic_random2d(self): else: return self.symbolic_random - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def set_size_and_deterministic(self, node, s, d, more_replacements=None): """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node @@ -1085,7 +1143,9 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): ------- :class:`Variable` with applied replacements, ready to use """ - flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements) + flat2rand = self.make_size_and_deterministic_replacements( + s, d, more_replacements + ) node_out = theano.clone(node, flat2rand) try_to_set_test_value(node, node_out, s) return node_out @@ -1106,8 +1166,7 @@ def symbolic_sample_over_posterior(self, node): def sample(post): return theano.clone(node, {self.input: post}) - nodes, _ = theano.scan( - sample, random) + nodes, _ = theano.scan(sample, random) return nodes def symbolic_single_sample(self, node): @@ -1118,9 +1177,7 @@ def symbolic_single_sample(self, node): node = self.to_flat_input(node) random = self.symbolic_random.astype(self.symbolic_initial.dtype) random = tt.patternbroadcast(random, self.symbolic_initial.broadcastable) - return theano.clone( - node, {self.input: random[0]} - ) + return theano.clone(node, {self.input: random[0]}) def make_size_and_deterministic_replacements(self, s, d, more_replacements=None): """*Dev* - creates correct replacements for initial depending on @@ -1148,8 +1205,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None) @node_property def symbolic_normalizing_constant(self): """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`""" - t = self.to_flat_input( - tt.max([v.scaling for v in self.group])) + t = self.to_flat_input(tt.max([v.scaling for v in self.group])) t = self.symbolic_single_sample(t) return pm.floatX(t) @@ -1185,14 +1241,14 @@ def logq_norm(self): def __str__(self): if self.group is None: - shp = 'undefined' + shp = "undefined" else: shp = str(self.ddim) if self.local: - shp = 'None, ' + shp + shp = "None, " + shp elif self.batched: - shp = str(self.bdim) + ', ' + shp - return '{cls}[{shp}]'.format(shp=shp, cls=self.__class__.__name__) + shp = str(self.bdim) + ", " + shp + return "{cls}[{shp}]".format(shp=shp, cls=self.__class__.__name__) @node_property def std(self): @@ -1245,25 +1301,26 @@ def __init__(self, groups, model=None): self._scale_cost_to_minibatch = theano.shared(np.int8(1)) model = modelcontext(model) if not model.free_RVs: - raise TypeError('Model does not have FreeRVs') + raise TypeError("Model does not have FreeRVs") self.groups = list() seen = set() rest = None for g in groups: if g.group is None: if rest is not None: - raise GroupError('More than one group is specified for ' - 'the rest variables') + raise GroupError( + "More than one group is specified for " "the rest variables" + ) else: rest = g else: if set(g.group) & seen: - raise GroupError('Found duplicates in groups') + raise GroupError("Found duplicates in groups") seen.update(g.group) self.groups.append(g) if set(model.free_RVs) - seen: if rest is None: - raise GroupError('No approximation is specified for the rest variables') + raise GroupError("No approximation is specified for the rest variables") else: rest.__init_group__(list(set(model.free_RVs) - seen)) self.groups.append(rest) @@ -1271,22 +1328,24 @@ def __init__(self, groups, model=None): @property def has_logq(self): - return all(self.collect('has_logq')) + return all(self.collect("has_logq")) - def collect(self, item, part='total'): - if part == 'total': + def collect(self, item, part="total"): + if part == "total": return [getattr(g, item) for g in self.groups] - elif part == 'local': + elif part == "local": return [getattr(g, item) for g in self.groups if g.local] - elif part == 'global': + elif part == "global": return [getattr(g, item) for g in self.groups if not g.local] - elif part == 'batched': + elif part == "batched": return [getattr(g, item) for g in self.groups if g.batched] else: - raise ValueError("unknown part %s, expected {'local', 'global', 'total', 'batched'}") + raise ValueError( + "unknown part %s, expected {'local', 'global', 'total', 'batched'}" + ) - inputs = property(lambda self: self.collect('input')) - symbolic_randoms = property(lambda self: self.collect('symbolic_random')) + inputs = property(lambda self: self.collect("input")) + symbolic_randoms = property(lambda self: self.collect("symbolic_random")) @property def scale_cost_to_minibatch(self): @@ -1303,22 +1362,21 @@ def symbolic_normalizing_constant(self): Here the effect is controlled by `self.scale_cost_to_minibatch` """ t = tt.max( - self.collect('symbolic_normalizing_constant') + [ - var.scaling for var in self.model.observed_RVs - ]) - t = tt.switch(self._scale_cost_to_minibatch, t, - tt.constant(1, dtype=t.dtype)) + self.collect("symbolic_normalizing_constant") + + [var.scaling for var in self.model.observed_RVs] + ) + t = tt.switch(self._scale_cost_to_minibatch, t, tt.constant(1, dtype=t.dtype)) return pm.floatX(t) @node_property def symbolic_logq(self): """*Dev* - collects `symbolic_logq` for all groups""" - return tt.add(*self.collect('symbolic_logq')) + return tt.add(*self.collect("symbolic_logq")) @node_property def logq(self): """*Dev* - collects `logQ` for all groups""" - return tt.add(*self.collect('logq')) + return tt.add(*self.collect("logq")) @node_property def logq_norm(self): @@ -1329,7 +1387,8 @@ def logq_norm(self): def _sized_symbolic_varlogp_and_datalogp(self): """*Dev* - computes sampled prior term from model via `theano.scan`""" varlogp_s, datalogp_s = self.symbolic_sample_over_posterior( - [self.model.varlogpt, self.model.datalogpt]) + [self.model.varlogpt, self.model.datalogpt] + ) return varlogp_s, datalogp_s # both shape (s,) @node_property @@ -1366,7 +1425,8 @@ def datalogp(self): def _single_symbolic_varlogp_and_datalogp(self): """*Dev* - computes sampled prior term from model via `theano.scan`""" varlogp, datalogp = self.symbolic_single_sample( - [self.model.varlogpt, self.model.datalogpt]) + [self.model.varlogpt, self.model.datalogpt] + ) return varlogp, datalogp @node_property @@ -1405,9 +1465,9 @@ def datalogp_norm(self): @property def replacements(self): """*Dev* - all replacements from groups to replace PyMC random variables with approximation""" - return collections.OrderedDict(itertools.chain.from_iterable( - g.replacements.items() for g in self.groups - )) + return collections.OrderedDict( + itertools.chain.from_iterable(g.replacements.items() for g in self.groups) + ) def make_size_and_deterministic_replacements(self, s, d, more_replacements=None): """*Dev* - creates correct replacements for initial depending on @@ -1430,11 +1490,13 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None) more_replacements = {} flat2rand = collections.OrderedDict() for g in self.groups: - flat2rand.update(g.make_size_and_deterministic_replacements(s, d, more_replacements)) + flat2rand.update( + g.make_size_and_deterministic_replacements(s, d, more_replacements) + ) flat2rand.update(more_replacements) return flat2rand - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def set_size_and_deterministic(self, node, s, d, more_replacements=None): """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node @@ -1456,7 +1518,9 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): """ _node = node optimizations = self.get_optimization_replacements(s, d) - flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements) + flat2rand = self.make_size_and_deterministic_replacements( + s, d, more_replacements + ) node = theano.clone(node, optimizations) node = theano.clone(node, flat2rand) try_to_set_test_value(_node, node, s) @@ -1476,8 +1540,7 @@ def symbolic_sample_over_posterior(self, node): def sample(*post): return theano.clone(node, dict(zip(self.inputs, post))) - nodes, _ = theano.scan( - sample, self.symbolic_randoms) + nodes, _ = theano.scan(sample, self.symbolic_randoms) return nodes def symbolic_single_sample(self, node): @@ -1488,9 +1551,7 @@ def symbolic_single_sample(self, node): node = self.to_flat_input(node) post = [v[0] for v in self.symbolic_randoms] inp = self.inputs - return theano.clone( - node, dict(zip(inp, post)) - ) + return theano.clone(node, dict(zip(inp, post))) def get_optimization_replacements(self, s, d): """*Dev* - optimizations for logP. If sample size is static and equal to 1: @@ -1503,10 +1564,8 @@ def get_optimization_replacements(self, s, d): repl[self.datalogp] = self.single_symbolic_datalogp return repl - @change_flags(compute_test_value='off') - def sample_node(self, node, size=None, - deterministic=False, - more_replacements=None): + @change_flags(compute_test_value="off") + def sample_node(self, node, size=None, deterministic=False, more_replacements=None): """Samples given node or nodes over shared posterior Parameters @@ -1530,7 +1589,9 @@ def sample_node(self, node, size=None, node_out = self.symbolic_single_sample(node) else: node_out = self.symbolic_sample_over_posterior(node) - node_out = self.set_size_and_deterministic(node_out, size, deterministic, more_replacements) + node_out = self.set_size_and_deterministic( + node_out, size, deterministic, more_replacements + ) try_to_set_test_value(node_in, node_out, size) return node_out @@ -1538,24 +1599,27 @@ def rslice(self, name): """*Dev* - vectorized sampling for named random variable without call to `theano.scan`. This node still needs :func:`set_size_and_deterministic` to be evaluated """ + def vars_names(vs): return {v.name for v in vs} + for vars_, random, ordering in zip( - self.collect('group'), - self.symbolic_randoms, - self.collect('ordering')): + self.collect("group"), self.symbolic_randoms, self.collect("ordering") + ): if name in vars_names(vars_): name_, slc, shape, dtype = ordering[name] - found = random[..., slc].reshape((random.shape[0], ) + shape).astype(dtype) - found.name = name + '_vi_random_slice' + found = ( + random[..., slc].reshape((random.shape[0],) + shape).astype(dtype) + ) + found.name = name + "_vi_random_slice" break else: - raise KeyError('%r not found' % name) + raise KeyError("%r not found" % name) return found @property @memoize(bound=True) - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def sample_dict_fn(self): s = tt.iscalar() names = [v.name for v in self.model.free_RVs] @@ -1565,7 +1629,9 @@ def sample_dict_fn(self): def inner(draws=100): _samples = sample_fn(draws) - return dict([(v_.name, s_) for v_, s_ in zip(self.model.free_RVs, _samples)]) + return dict( + [(v_.name, s_) for v_, s_ in zip(self.model.free_RVs, _samples)] + ) return inner @@ -1584,13 +1650,19 @@ def sample(self, draws=500, include_transformed=True): trace : :class:`pymc3.backends.base.MultiTrace` Samples drawn from variational posterior. """ - vars_sampled = get_default_varnames(self.model.unobserved_RVs, - include_transformed=include_transformed) + vars_sampled = get_default_varnames( + self.model.unobserved_RVs, include_transformed=include_transformed + ) samples = self.sample_dict_fn(draws) # type: dict - points = ({name: records[i] for name, records in samples.items()} for i in range(draws)) - trace = pm.sampling.NDArray(model=self.model, vars=vars_sampled, test_point={ - name: records[0] for name, records in samples.items() - }) + points = ( + {name: records[i] for name, records in samples.items()} + for i in range(draws) + ) + trace = pm.sampling.NDArray( + model=self.model, + vars=vars_sampled, + test_point={name: records[0] for name, records in samples.items()}, + ) try: trace.setup(draws=draws, chain=0) for point in points: @@ -1601,34 +1673,34 @@ def sample(self, draws=500, include_transformed=True): @property def ndim(self): - return sum(self.collect('ndim')) + return sum(self.collect("ndim")) @property def ddim(self): - return sum(self.collect('ddim')) + return sum(self.collect("ddim")) @property def has_local(self): - return any(self.collect('local')) + return any(self.collect("local")) @property def has_global(self): - return any(not c for c in self.collect('local')) + return any(not c for c in self.collect("local")) @property def has_batched(self): - return any(not c for c in self.collect('batched')) + return any(not c for c in self.collect("batched")) @node_property def symbolic_random(self): - return tt.concatenate(self.collect('symbolic_random2d'), axis=-1) + return tt.concatenate(self.collect("symbolic_random2d"), axis=-1) def __str__(self): if len(self.groups) < 5: - return 'Approximation{' + ' & '.join(map(str, self.groups)) + '}' + return "Approximation{" + " & ".join(map(str, self.groups)) + "}" else: - forprint = self.groups[:2] + ['...'] + self.groups[-2:] - return 'Approximation{' + ' & '.join(map(str, forprint)) + '}' + forprint = self.groups[:2] + ["..."] + self.groups[-2:] + return "Approximation{" + " & ".join(map(str, forprint)) + "}" @property def all_histograms(self): @@ -1641,9 +1713,11 @@ def any_histograms(self): @node_property def joint_histogram(self): if not self.all_histograms: - raise VariationalInferenceError('%s does not consist of all Empirical approximations') - return tt.concatenate(self.collect('histogram'), axis=-1) + raise VariationalInferenceError( + "%s does not consist of all Empirical approximations" + ) + return tt.concatenate(self.collect("histogram"), axis=-1) @property def params(self): - return sum(self.collect('params'), []) + return sum(self.collect("params"), []) diff --git a/pymc3/variational/stein.py b/pymc3/variational/stein.py index cc3b0fccb2..543ab71843 100644 --- a/pymc3/variational/stein.py +++ b/pymc3/variational/stein.py @@ -4,9 +4,7 @@ from pymc3.theanof import floatX, change_flags from pymc3.memoize import WithMemoization, memoize -__all__ = [ - 'Stein' -] +__all__ = ["Stein"] class Stein(WithMemoization): @@ -26,27 +24,24 @@ def input_joint_matrix(self): @node_property def approx_symbolic_matrices(self): if self.use_histogram: - return self.approx.collect('histogram') + return self.approx.collect("histogram") else: return self.approx.symbolic_randoms @node_property def dlogp(self): - grad = tt.grad( - self.logp_norm.sum(), - self.approx_symbolic_matrices - ) + grad = tt.grad(self.logp_norm.sum(), self.approx_symbolic_matrices) def flatten2(tensor): return tensor.flatten(2) + return tt.concatenate(list(map(flatten2, grad)), -1) @node_property def grad(self): n = floatX(self.input_joint_matrix.shape[0]) temperature = self.temperature - svgd_grad = (self.density_part_grad / temperature + - self.repulsive_part_grad) + svgd_grad = self.density_part_grad / temperature + self.repulsive_part_grad return svgd_grad / n @node_property @@ -75,11 +70,13 @@ def logp_norm(self): if self.use_histogram: sized_symbolic_logp = theano.clone( sized_symbolic_logp, - dict(zip(self.approx.symbolic_randoms, self.approx.collect('histogram'))) + dict( + zip(self.approx.symbolic_randoms, self.approx.collect("histogram")) + ), ) return sized_symbolic_logp / self.approx.symbolic_normalizing_constant @memoize - @change_flags(compute_test_value='off') + @change_flags(compute_test_value="off") def _kernel(self): return self._kernel_f(self.input_joint_matrix) diff --git a/pymc3/variational/test_functions.py b/pymc3/variational/test_functions.py index 6f6b919ff0..fa99bac71a 100644 --- a/pymc3/variational/test_functions.py +++ b/pymc3/variational/test_functions.py @@ -2,9 +2,7 @@ from .opvi import TestFunction from pymc3.theanof import floatX -__all__ = [ - 'rbf' -] +__all__ = ["rbf"] class Kernel(TestFunction): @@ -21,20 +19,22 @@ class Kernel(TestFunction): class RBF(Kernel): def __call__(self, X): XY = X.dot(X.T) - x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, 'x') + x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, "x") X2e = tt.repeat(x2, X.shape[0], axis=1) - H = X2e + X2e.T - 2. * XY + H = X2e + X2e.T - 2.0 * XY V = tt.sort(H.flatten()) length = V.shape[0] # median distance - m = tt.switch(tt.eq((length % 2), 0), - # if even vector - tt.mean(V[((length // 2) - 1):((length // 2) + 1)]), - # if odd vector - V[length // 2]) - - h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1)) + m = tt.switch( + tt.eq((length % 2), 0), + # if even vector + tt.mean(V[((length // 2) - 1) : ((length // 2) + 1)]), + # if odd vector + V[length // 2], + ) + + h = 0.5 * m / tt.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = tt.exp(-H / h / 2.0) diff --git a/pymc3/variational/updates.py b/pymc3/variational/updates.py index d9b4319f5c..ae9cbf1fa8 100755 --- a/pymc3/variational/updates.py +++ b/pymc3/variational/updates.py @@ -148,13 +148,17 @@ def get_or_compute_grads(loss_or_grads, params): compute its gradient, we can never update it and want to fail early). """ if any(not isinstance(p, theano.compile.SharedVariable) for p in params): - raise ValueError("params must contain shared variables only. If it " - "contains arbitrary parameter expressions, then " - "lasagne.utils.collect_shared_vars() may help you.") + raise ValueError( + "params must contain shared variables only. If it " + "contains arbitrary parameter expressions, then " + "lasagne.utils.collect_shared_vars() may help you." + ) if isinstance(loss_or_grads, list): if not len(loss_or_grads) == len(params): - raise ValueError("Got %d gradient expressions for %d parameters" % - (len(loss_or_grads), len(params))) + raise ValueError( + "Got %d gradient expressions for %d parameters" + % (len(loss_or_grads), len(params)) + ) return loss_or_grads else: return theano.grad(loss_or_grads, params) @@ -162,8 +166,8 @@ def get_or_compute_grads(loss_or_grads, params): def _get_call_kwargs(_locals_): _locals_ = _locals_.copy() - _locals_.pop('loss_or_grads') - _locals_.pop('params') + _locals_.pop("loss_or_grads") + _locals_.pop("params") return _locals_ @@ -211,7 +215,8 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3): return partial(sgd, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() @@ -260,8 +265,9 @@ def apply_momentum(updates, params=None, momentum=0.9): for param in params: value = param.get_value(borrow=True) - velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + velocity = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) x = momentum * velocity + updates[param] updates[velocity] = x - param updates[param] = x @@ -269,8 +275,7 @@ def apply_momentum(updates, params=None, momentum=0.9): return updates -def momentum(loss_or_grads=None, params=None, - learning_rate=1e-3, momentum=0.9): +def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9): """Stochastic Gradient Descent (SGD) updates with momentum Generates update expressions of the form: @@ -326,7 +331,8 @@ def momentum(loss_or_grads=None, params=None, return partial(pm.updates.momentum, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) updates = sgd(loss_or_grads, params, learning_rate) return apply_momentum(updates, momentum=momentum) @@ -376,8 +382,9 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9): for param in params: value = param.get_value(borrow=True) - velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + velocity = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) x = momentum * velocity + updates[param] - param updates[velocity] = x updates[param] = momentum * x + updates[param] @@ -385,8 +392,9 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9): return updates -def nesterov_momentum(loss_or_grads=None, params=None, - learning_rate=1e-3, momentum=0.9): +def nesterov_momentum( + loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9 +): """Stochastic Gradient Descent (SGD) updates with Nesterov momentum Generates update expressions of the form: @@ -447,7 +455,8 @@ def nesterov_momentum(loss_or_grads=None, params=None, return partial(nesterov_momentum, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) updates = sgd(loss_or_grads, params, learning_rate) return apply_nesterov_momentum(updates, momentum=momentum) @@ -516,24 +525,26 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6): return partial(adagrad, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): value = param.get_value(borrow=True) - accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + accu = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) accu_new = accu + grad ** 2 updates[accu] = accu_new - updates[param] = param - (learning_rate * grad / - tt.sqrt(accu_new + epsilon)) + updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon)) return updates -def adagrad_window(loss_or_grads=None, params=None, - learning_rate=0.001, epsilon=.1, n_win=10): +def adagrad_window( + loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10 +): """Returns a function that returns parameter updates. Instead of accumulated estimate, uses running window @@ -558,15 +569,16 @@ def adagrad_window(loss_or_grads=None, params=None, if loss_or_grads is None and params is None: return partial(adagrad_window, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: - raise ValueError('Please provide both `loss_or_grads` and `params` to get updates') + raise ValueError( + "Please provide both `loss_or_grads` and `params` to get updates" + ) grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): i = theano.shared(pm.floatX(0)) - i_int = i.astype('int32') + i_int = i.astype("int32") value = param.get_value(borrow=True) - accu = theano.shared( - np.zeros(value.shape + (n_win,), dtype=value.dtype)) + accu = theano.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype)) # Append squared gradient vector to accu_new accu_new = tt.set_subtensor(accu[..., i_int], grad ** 2) @@ -575,13 +587,11 @@ def adagrad_window(loss_or_grads=None, params=None, updates[i] = i_new accu_sum = accu_new.sum(axis=-1) - updates[param] = param - (learning_rate * grad / - tt.sqrt(accu_sum + epsilon)) + updates[param] = param - (learning_rate * grad / tt.sqrt(accu_sum + epsilon)) return updates -def rmsprop(loss_or_grads=None, params=None, - learning_rate=1.0, rho=0.9, epsilon=1e-6): +def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon=1e-6): """RMSProp updates Scale learning rates by dividing with the moving average of the root mean @@ -646,7 +656,8 @@ def rmsprop(loss_or_grads=None, params=None, return partial(rmsprop, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() @@ -655,18 +666,19 @@ def rmsprop(loss_or_grads=None, params=None, for param, grad in zip(params, grads): value = param.get_value(borrow=True) - accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + accu = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) accu_new = rho * accu + (one - rho) * grad ** 2 updates[accu] = accu_new - updates[param] = param - (learning_rate * grad / - tt.sqrt(accu_new + epsilon)) + updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon)) return updates -def adadelta(loss_or_grads=None, params=None, - learning_rate=1.0, rho=0.95, epsilon=1e-6): +def adadelta( + loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsilon=1e-6 +): """ Adadelta updates Scale learning rates by the ratio of accumulated gradients to accumulated @@ -740,7 +752,8 @@ def adadelta(loss_or_grads=None, params=None, return partial(adadelta, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() @@ -750,19 +763,20 @@ def adadelta(loss_or_grads=None, params=None, for param, grad in zip(params, grads): value = param.get_value(borrow=True) # accu: accumulate gradient magnitudes - accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + accu = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) # delta_accu: accumulate update magnitudes (recursively!) - delta_accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + delta_accu = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) # update accu (as in rmsprop) accu_new = rho * accu + (one - rho) * grad ** 2 updates[accu] = accu_new # compute parameter update, using the 'old' delta_accu - update = (grad * tt.sqrt(delta_accu + epsilon) / - tt.sqrt(accu_new + epsilon)) + update = grad * tt.sqrt(delta_accu + epsilon) / tt.sqrt(accu_new + epsilon) updates[param] = param - learning_rate * update # update delta_accu (as accu, but accumulating updates) @@ -772,8 +786,14 @@ def adadelta(loss_or_grads=None, params=None, return updates -def adam(loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9, - beta2=0.999, epsilon=1e-8): +def adam( + loss_or_grads=None, + params=None, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, +): """Adam updates Adam updates implemented as in [1]_. @@ -831,26 +851,29 @@ def adam(loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9, return partial(adam, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) all_grads = get_or_compute_grads(loss_or_grads, params) - t_prev = theano.shared(pm.theanof.floatX(0.)) + t_prev = theano.shared(pm.theanof.floatX(0.0)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = tt.constant(1) t = t_prev + 1 - a_t = learning_rate * tt.sqrt(one - beta2**t) / (one - beta1**t) + a_t = learning_rate * tt.sqrt(one - beta2 ** t) / (one - beta1 ** t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) - m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) - v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + m_prev = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) + v_prev = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) m_t = beta1 * m_prev + (one - beta1) * g_t - v_t = beta2 * v_prev + (one - beta2) * g_t**2 + v_t = beta2 * v_prev + (one - beta2) * g_t ** 2 step = a_t * m_t / (tt.sqrt(v_t) + epsilon) updates[m_prev] = m_t @@ -861,8 +884,14 @@ def adam(loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9, return updates -def adamax(loss_or_grads=None, params=None, learning_rate=0.002, beta1=0.9, - beta2=0.999, epsilon=1e-8): +def adamax( + loss_or_grads=None, + params=None, + learning_rate=0.002, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, +): """Adamax updates Adamax updates implemented as in [1]_. This is a variant of of the Adam @@ -917,23 +946,26 @@ def adamax(loss_or_grads=None, params=None, learning_rate=0.002, beta1=0.9, return partial(adamax, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( - 'Please provide both `loss_or_grads` and `params` to get updates') + "Please provide both `loss_or_grads` and `params` to get updates" + ) all_grads = get_or_compute_grads(loss_or_grads, params) - t_prev = theano.shared(pm.theanof.floatX(0.)) + t_prev = theano.shared(pm.theanof.floatX(0.0)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = tt.constant(1) t = t_prev + 1 - a_t = learning_rate / (one - beta1**t) + a_t = learning_rate / (one - beta1 ** t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) - m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) - u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), - broadcastable=param.broadcastable) + m_prev = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) + u_prev = theano.shared( + np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable + ) m_t = beta1 * m_prev + (one - beta1) * g_t u_t = tt.maximum(beta2 * u_prev, abs(g_t)) @@ -1021,14 +1053,12 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): dtype = np.dtype(theano.config.floatX).type norms = tt.sqrt(tt.sum(tt.sqr(tensor_var), axis=sum_over, keepdims=True)) target_norms = tt.clip(norms, 0, dtype(max_norm)) - constrained_output = \ - (tensor_var * (target_norms / (dtype(epsilon) + norms))) + constrained_output = tensor_var * (target_norms / (dtype(epsilon) + norms)) return constrained_output -def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, - return_norm=False): +def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False): """Rescales a list of tensors based on their combined norm If the combined norm of the input tensors exceeds the threshold then all @@ -1083,7 +1113,7 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, learning with neural networks. In Advances in Neural Information Processing Systems (pp. 3104-3112). """ - norm = tt.sqrt(sum(tt.sum(tensor**2) for tensor in tensor_vars)) + norm = tt.sqrt(sum(tt.sum(tensor ** 2) for tensor in tensor_vars)) dtype = np.dtype(theano.config.floatX).type target_norm = tt.clip(norm, 0, dtype(max_norm)) multiplier = target_norm / (dtype(epsilon) + norm) diff --git a/pymc3/vartypes.py b/pymc3/vartypes.py index d0a4f20754..fc8d8e033d 100644 --- a/pymc3/vartypes.py +++ b/pymc3/vartypes.py @@ -2,23 +2,24 @@ import six -__all__ = ['bool_types', 'int_types', 'float_types', 'complex_types', 'continuous_types', - 'discrete_types', 'typefilter', 'isgenerator'] - -bool_types = set(['int8']) - -int_types = set(['int8', - 'int16', - 'int32', - 'int64', - 'uint8', - 'uint16', - 'uint32', - 'uint64']) -float_types = set(['float32', - 'float64']) -complex_types = set(['complex64', - 'complex128']) +__all__ = [ + "bool_types", + "int_types", + "float_types", + "complex_types", + "continuous_types", + "discrete_types", + "typefilter", + "isgenerator", +] + +bool_types = set(["int8"]) + +int_types = set( + ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"] +) +float_types = set(["float32", "float64"]) +complex_types = set(["complex64", "complex128"]) continuous_types = float_types | complex_types discrete_types = bool_types | int_types @@ -34,5 +35,4 @@ def typefilter(vars, types): def isgenerator(obj): - return ((hasattr(obj, '__next__') and six.PY3) or - (hasattr(obj, 'next') and six.PY2)) + return (hasattr(obj, "__next__") and six.PY3) or (hasattr(obj, "next") and six.PY2)