diff --git a/pymc3/__init__.py b/pymc3/__init__.py
index d2fe3bc39d..1214ec9739 100644
--- a/pymc3/__init__.py
+++ b/pymc3/__init__.py
@@ -5,7 +5,15 @@
 from .distributions import *
 from .glm import *
 from . import gp
-from .math import logaddexp, logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
+from .math import (
+    logaddexp,
+    logsumexp,
+    logit,
+    invlogit,
+    expand_packed_triangular,
+    probit,
+    invprobit,
+)
 from .model import *
 from .model_graph import model_to_graphviz
 from .stats import *
@@ -28,7 +36,8 @@
 from .data import *
 
 import logging
-_log = logging.getLogger('pymc3')
+
+_log = logging.getLogger("pymc3")
 if not logging.root.handlers:
     _log.setLevel(logging.INFO)
     handler = logging.StreamHandler()
diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py
index 95a179b780..d3b9561df5 100644
--- a/pymc3/backends/__init__.py
+++ b/pymc3/backends/__init__.py
@@ -122,9 +122,8 @@
 from ..backends.sqlite import SQLite
 from ..backends.hdf5 import HDF5
 
-_shortcuts = {'text': {'backend': Text,
-                       'name': 'mcmc'},
-              'sqlite': {'backend': SQLite,
-                         'name': 'mcmc.sqlite'},
-              'hdf5': {'backend': HDF5,
-                       'name': 'mcmc.hdf5'}}
+_shortcuts = {
+    "text": {"backend": Text, "name": "mcmc"},
+    "sqlite": {"backend": SQLite, "name": "mcmc.sqlite"},
+    "hdf5": {"backend": HDF5, "name": "mcmc.hdf5"},
+}
diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py
index 1efb55a3e2..d08673f62e 100644
--- a/pymc3/backends/base.py
+++ b/pymc3/backends/base.py
@@ -13,7 +13,7 @@
 from ..model import modelcontext
 from .report import SamplerReport, merge_reports
 
-logger = logging.getLogger('pymc3')
+logger = logging.getLogger("pymc3")
 
 
 class BackendError(Exception):
@@ -58,10 +58,8 @@ def __init__(self, name, model=None, vars=None, test_point=None):
             test_point_.update(test_point)
             test_point = test_point_
         var_values = list(zip(self.varnames, self.fn(test_point)))
-        self.var_shapes = {var: value.shape
-                           for var, value in var_values}
-        self.var_dtypes = {var: value.dtype
-                           for var, value in var_values}
+        self.var_shapes = {var: value.shape for var, value in var_values}
+        self.var_dtypes = {var: value.dtype for var, value in var_values}
         self.chain = None
         self._is_base_setup = False
         self.sampler_vars = None
@@ -87,8 +85,9 @@ def _set_sampler_vars(self, sampler_vars):
         for stats in sampler_vars:
             for key, dtype in stats.items():
                 if dtypes.setdefault(key, dtype) != dtype:
-                    raise ValueError("Sampler statistic %s appears with "
-                                     "different types." % key)
+                    raise ValueError(
+                        "Sampler statistic %s appears with " "different types." % key
+                    )
 
         self.sampler_vars = sampler_vars
 
@@ -137,7 +136,7 @@ def __getitem__(self, idx):
         try:
             return self.point(int(idx))
         except (ValueError, TypeError):  # Passed variable or variable name.
-            raise ValueError('Can only index with slice or integer')
+            raise ValueError("Can only index with slice or integer")
 
     def __len__(self):
         raise NotImplementedError
@@ -181,13 +180,14 @@ def get_sampler_stats(self, varname, sampler_idx=None, burn=0, thin=1):
         if sampler_idx is not None:
             return self._get_sampler_stats(varname, sampler_idx, burn, thin)
 
-        sampler_idxs = [i for i, s in enumerate(self.sampler_vars)
-                        if varname in s]
+        sampler_idxs = [i for i, s in enumerate(self.sampler_vars) if varname in s]
         if not sampler_idxs:
             raise KeyError("Unknown sampler stat %s" % varname)
 
-        vals = np.stack([self._get_sampler_stats(varname, i, burn, thin)
-                         for i in sampler_idxs], axis=-1)
+        vals = np.stack(
+            [self._get_sampler_stats(varname, i, burn, thin) for i in sampler_idxs],
+            axis=-1,
+        )
         if vals.shape[-1] == 1:
             return vals[..., 0]
         else:
@@ -267,13 +267,14 @@ def __init__(self, straces):
 
         self._report = SamplerReport()
         for strace in straces:
-            if hasattr(strace, '_warnings'):
+            if hasattr(strace, "_warnings"):
                 self._report._add_warnings(strace._warnings, strace.chain)
 
     def __repr__(self):
-        template = '<{}: {} chains, {} iterations, {} variables>'
-        return template.format(self.__class__.__name__,
-                               self.nchains, len(self), len(self.varnames))
+        template = "<{}: {} chains, {} iterations, {} variables>"
+        return template.format(
+            self.__class__.__name__, self.nchains, len(self), len(self.varnames)
+        )
 
     @property
     def nchains(self):
@@ -310,16 +311,26 @@ def __getitem__(self, idx):
         var = str(var)
         if var in self.varnames:
             if var in self.stat_names:
-                warnings.warn("Attribute access on a trace object is ambigous. "
-                              "Sampler statistic and model variable share a name. Use "
-                              "trace.get_values or trace.get_sampler_stats.")
+                warnings.warn(
+                    "Attribute access on a trace object is ambigous. "
+                    "Sampler statistic and model variable share a name. Use "
+                    "trace.get_values or trace.get_sampler_stats."
+                )
             return self.get_values(var, burn=burn, thin=thin)
         if var in self.stat_names:
             return self.get_sampler_stats(var, burn=burn, thin=thin)
         raise KeyError("Unknown variable %s" % var)
 
-    _attrs = set(['_straces', 'varnames', 'chains', 'stat_names',
-                  'supports_sampler_stats', '_report'])
+    _attrs = set(
+        [
+            "_straces",
+            "varnames",
+            "chains",
+            "stat_names",
+            "supports_sampler_stats",
+            "_report",
+        ]
+    )
 
     def __getattr__(self, name):
         # Avoid infinite recursion when called before __init__
@@ -330,14 +341,17 @@ def __getattr__(self, name):
         name = str(name)
         if name in self.varnames:
             if name in self.stat_names:
-                warnings.warn("Attribute access on a trace object is ambigous. "
-                              "Sampler statistic and model variable share a name. Use "
-                              "trace.get_values or trace.get_sampler_stats.")
+                warnings.warn(
+                    "Attribute access on a trace object is ambigous. "
+                    "Sampler statistic and model variable share a name. Use "
+                    "trace.get_values or trace.get_sampler_stats."
+                )
             return self.get_values(name)
         if name in self.stat_names:
             return self.get_sampler_stats(name)
-        raise AttributeError("'{}' object has no attribute '{}'".format(
-            type(self).__name__, name))
+        raise AttributeError(
+            "'{}' object has no attribute '{}'".format(type(self).__name__, name)
+        )
 
     def __len__(self):
         chain = self.chains[-1]
@@ -392,10 +406,12 @@ def add_values(self, vals, overwrite=False):
             l_samples = len(self) * len(self.chains)
             l_v = len(v)
             if l_v != l_samples:
-                warnings.warn("The length of the values you are trying to "
-                              "add ({}) does not match the number ({}) of "
-                              "total samples in the trace "
-                              "(chains * iterations)".format(l_v, l_samples))
+                warnings.warn(
+                    "The length of the values you are trying to "
+                    "add ({}) does not match the number ({}) of "
+                    "total samples in the trace "
+                    "(chains * iterations)".format(l_v, l_samples)
+                )
 
             v = np.squeeze(v.reshape(len(chains), len(self), -1))
 
@@ -424,8 +440,9 @@ def remove_values(self, name):
                     chain.vars.remove(va)
                     del chain.samples[name]
 
-    def get_values(self, varname, burn=0, thin=1, combine=True, chains=None,
-                   squeeze=True):
+    def get_values(
+        self, varname, burn=0, thin=1, combine=True, chains=None, squeeze=True
+    ):
         """Get values from traces.
 
         Parameters
@@ -452,14 +469,16 @@ def get_values(self, varname, burn=0, thin=1, combine=True, chains=None,
             chains = self.chains
         varname = str(varname)
         try:
-            results = [self._straces[chain].get_values(varname, burn, thin)
-                       for chain in chains]
+            results = [
+                self._straces[chain].get_values(varname, burn, thin) for chain in chains
+            ]
         except TypeError:  # Single chain passed.
             results = [self._straces[chains].get_values(varname, burn, thin)]
         return _squeeze_cat(results, combine, squeeze)
 
-    def get_sampler_stats(self, varname, burn=0, thin=1, combine=True,
-                          chains=None, squeeze=True):
+    def get_sampler_stats(
+        self, varname, burn=0, thin=1, combine=True, chains=None, squeeze=True
+    ):
         """Get sampler statistics from the trace.
 
         Parameters
@@ -487,8 +506,10 @@ def get_sampler_stats(self, varname, burn=0, thin=1, combine=True,
         except TypeError:
             chains = [chains]
 
-        results = [self._straces[chain].get_sampler_stats(varname, None, burn, thin)
-                   for chain in chains]
+        results = [
+            self._straces[chain].get_sampler_stats(varname, None, burn, thin)
+            for chain in chains
+        ]
         return _squeeze_cat(results, combine, squeeze)
 
     def _slice(self, slice):
diff --git a/pymc3/backends/hdf5.py b/pymc3/backends/hdf5.py
index 3d800bf199..5e732bbb54 100644
--- a/pymc3/backends/hdf5.py
+++ b/pymc3/backends/hdf5.py
@@ -2,6 +2,7 @@
 import h5py
 from contextlib import contextmanager
 
+
 @contextmanager
 def activator(instance):
     if isinstance(instance.hdf5_file, h5py.File):
@@ -9,7 +10,7 @@ def activator(instance):
             yield
             return
     # if file is closed/not referenced: open, do job, then close
-    instance.hdf5_file = h5py.File(instance.name, 'a')
+    instance.hdf5_file = h5py.File(instance.name, "a")
     yield
     instance.hdf5_file.close()
     return
@@ -50,21 +51,21 @@ def activate_file(self):
     @property
     def samples(self):
         g = self.hdf5_file.require_group(str(self.chain))
-        if 'name' not in g.attrs:
-            g.attrs['name'] = self.chain
-        return g.require_group('samples')
+        if "name" not in g.attrs:
+            g.attrs["name"] = self.chain
+        return g.require_group("samples")
 
     @property
     def stats(self):
         g = self.hdf5_file.require_group(str(self.chain))
-        if 'name' not in g.attrs:
-            g.attrs['name'] = self.chain
-        return g.require_group('stats')
+        if "name" not in g.attrs:
+            g.attrs["name"] = self.chain
+        return g.require_group("stats")
 
     @property
     def chains(self):
         with self.activate_file:
-            return [v.attrs['name'] for v in self.hdf5_file.values()]
+            return [v.attrs["name"] for v in self.hdf5_file.values()]
 
     @property
     def is_new_file(self):
@@ -84,19 +85,19 @@ def nchains(self):
     @property
     def records_stats(self):
         with self.activate_file:
-            return self.hdf5_file.attrs['records_stats']
+            return self.hdf5_file.attrs["records_stats"]
 
     @records_stats.setter
     def records_stats(self, v):
         with self.activate_file:
-            self.hdf5_file.attrs['records_stats'] = bool(v)
+            self.hdf5_file.attrs["records_stats"] = bool(v)
 
     def _resize(self, n):
         for v in self.samples.values():
             v.resize(n, axis=0)
         for key, group in self.stats.items():
             for statds in group.values():
-                statds.resize((n, ))
+                statds.resize((n,))
 
     @property
     def sampler_vars(self):
@@ -123,10 +124,15 @@ def sampler_vars(self, values):
                 if not data.keys():  # no pre-recorded stats
                     for varname, dtype in sampler.items():
                         if varname not in data:
-                            data.create_dataset(varname, (self.draws,), dtype=dtype, maxshape=(None,))
+                            data.create_dataset(
+                                varname, (self.draws,), dtype=dtype, maxshape=(None,)
+                            )
                 elif data.keys() != sampler.keys():
                     raise ValueError(
-                        "Sampler vars can't change, names incompatible: {} != {}".format(data.keys(), sampler.keys()))
+                        "Sampler vars can't change, names incompatible: {} != {}".format(
+                            data.keys(), sampler.keys()
+                        )
+                    )
             self.records_stats = True
 
     def setup(self, draws, chain, sampler_vars=None):
@@ -146,16 +152,18 @@ def setup(self, draws, chain, sampler_vars=None):
         with self.activate_file:
             for varname, shape in self.var_shapes.items():
                 if varname not in self.samples:
-                    self.samples.create_dataset(name=varname, shape=(draws, ) + shape,
-                                                dtype=self.var_dtypes[varname],
-                                                maxshape=(None, ) + shape)
+                    self.samples.create_dataset(
+                        name=varname,
+                        shape=(draws,) + shape,
+                        dtype=self.var_dtypes[varname],
+                        maxshape=(None,) + shape,
+                    )
             self.draw_idx = len(self)
             self.draws = self.draw_idx + draws
             self._set_sampler_vars(sampler_vars)
             self._is_base_setup = True
             self._resize(self.draws)
 
-
     def close(self):
         with self.activate_file:
             if self.draw_idx == self.draws:
@@ -190,8 +198,9 @@ def _slice(self, idx):
             start, stop, step = idx.indices(len(self))
             sliced = ndarray.NDArray(model=self.model, vars=self.vars)
             sliced.chain = self.chain
-            sliced.samples = {v: self.samples[v][start:stop:step]
-                              for v in self.varnames}
+            sliced.samples = {
+                v: self.samples[v][start:stop:step] for v in self.varnames
+            }
             sliced.draw_idx = (stop - start) // step
             return sliced
 
diff --git a/pymc3/backends/ndarray.py b/pymc3/backends/ndarray.py
index 1c57bb02dc..9c9610cbcc 100644
--- a/pymc3/backends/ndarray.py
+++ b/pymc3/backends/ndarray.py
@@ -35,7 +35,7 @@ def save_trace(trace, directory=None, overwrite=False):
     str, path to the directory where the trace was saved
     """
     if directory is None:
-        directory = '.pymc_{}.trace'
+        directory = ".pymc_{}.trace"
         idx = 1
         while os.path.exists(directory.format(idx)):
             idx += 1
@@ -45,8 +45,10 @@ def save_trace(trace, directory=None, overwrite=False):
         if overwrite:
             shutil.rmtree(directory)
         else:
-            raise OSError('Cautiously refusing to overwrite the already existing {}! Please supply '
-                          'a different directory, or set `overwrite=True`'.format(directory))
+            raise OSError(
+                "Cautiously refusing to overwrite the already existing {}! Please supply "
+                "a different directory, or set `overwrite=True`".format(directory)
+            )
     os.makedirs(directory)
 
     for chain, ndarray in trace._straces.items():
@@ -72,15 +74,15 @@ def load_trace(directory, model=None):
     pm.Multitrace that was saved in the directory
     """
     straces = []
-    for directory in glob.glob(os.path.join(directory, '*')):
+    for directory in glob.glob(os.path.join(directory, "*")):
         if os.path.isdir(directory):
             straces.append(SerializeNDArray(directory).load(model))
     return base.MultiTrace(straces)
 
 
 class SerializeNDArray(object):
-    metadata_file = 'metadata.json'
-    samples_file = 'samples.npz'
+    metadata_file = "metadata.json"
+    samples_file = "samples.npz"
 
     def __init__(self, directory):
         """Helper to save and load NDArray objects"""
@@ -99,10 +101,10 @@ def to_metadata(ndarray):
                 stats.append({key: value.tolist() for key, value in stat.items()})
 
         metadata = {
-            'draw_idx': ndarray.draw_idx,
-            'draws': ndarray.draws,
-            '_stats': stats,
-            'chain': ndarray.chain,
+            "draw_idx": ndarray.draw_idx,
+            "draws": ndarray.draws,
+            "_stats": stats,
+            "chain": ndarray.chain,
         }
         return metadata
 
@@ -114,14 +116,14 @@ def save(self, ndarray):
         to reload the multitrace.
         """
         if not isinstance(ndarray, NDArray):
-            raise TypeError('Can only save NDArray')
+            raise TypeError("Can only save NDArray")
 
         if os.path.isdir(self.directory):
             shutil.rmtree(self.directory)
 
         os.mkdir(self.directory)
 
-        with open(self.metadata_path, 'w') as buff:
+        with open(self.metadata_path, "w") as buff:
             json.dump(SerializeNDArray.to_metadata(ndarray), buff)
 
         np.savez_compressed(self.samples_path, **ndarray.samples)
@@ -129,10 +131,12 @@ def save(self, ndarray):
     def load(self, model):
         """Load the saved ndarray from file"""
         new_trace = NDArray(model=model)
-        with open(self.metadata_path, 'r') as buff:
+        with open(self.metadata_path, "r") as buff:
             metadata = json.load(buff)
 
-        metadata['_stats'] = [{k: np.array(v) for k, v in stat.items()} for stat in metadata['_stats']]
+        metadata["_stats"] = [
+            {k: np.array(v) for k, v in stat.items()} for stat in metadata["_stats"]
+        ]
 
         for key, value in metadata.items():
             setattr(new_trace, key, value)
@@ -187,16 +191,16 @@ def setup(self, draws, chain, sampler_vars=None):
             self.draw_idx = old_draws
             for varname, shape in self.var_shapes.items():
                 old_var_samples = self.samples[varname]
-                new_var_samples = np.zeros((draws, ) + shape,
-                                           self.var_dtypes[varname])
-                self.samples[varname] = np.concatenate((old_var_samples,
-                                                        new_var_samples),
-                                                       axis=0)
+                new_var_samples = np.zeros((draws,) + shape, self.var_dtypes[varname])
+                self.samples[varname] = np.concatenate(
+                    (old_var_samples, new_var_samples), axis=0
+                )
         else:  # Otherwise, make array of zeros for each variable.
             self.draws = draws
             for varname, shape in self.var_shapes.items():
-                self.samples[varname] = np.zeros((draws, ) + shape,
-                                                 dtype=self.var_dtypes[varname])
+                self.samples[varname] = np.zeros(
+                    (draws,) + shape, dtype=self.var_dtypes[varname]
+                )
 
         if sampler_vars is None:
             return
@@ -247,12 +251,14 @@ def close(self):
             return
         # Remove trailing zeros if interrupted before completed all
         # draws.
-        self.samples = {var: vtrace[:self.draw_idx]
-                        for var, vtrace in self.samples.items()}
+        self.samples = {
+            var: vtrace[: self.draw_idx] for var, vtrace in self.samples.items()
+        }
         if self._stats is not None:
             self._stats = [
-                {var: trace[:self.draw_idx] for var, trace in stats.items()}
-                for stats in self._stats]
+                {var: trace[: self.draw_idx] for var, trace in stats.items()}
+                for stats in self._stats
+            ]
 
     # Selection methods
 
@@ -286,8 +292,9 @@ def _slice(self, idx):
 
         sliced = NDArray(model=self.model, vars=self.vars)
         sliced.chain = self.chain
-        sliced.samples = {varname: values[idx]
-                          for varname, values in self.samples.items()}
+        sliced.samples = {
+            varname: values[idx] for varname, values in self.samples.items()
+        }
         sliced.sampler_vars = self.sampler_vars
         sliced.draw_idx = (idx.stop - idx.start) // idx.step
 
@@ -307,8 +314,7 @@ def point(self, idx):
         with variable names as keys.
         """
         idx = int(idx)
-        return {varname: values[idx]
-                for varname, values in self.samples.items()}
+        return {varname: values[idx] for varname, values in self.samples.items()}
 
 
 def _slice_as_ndarray(strace, idx):
@@ -316,16 +322,20 @@ def _slice_as_ndarray(strace, idx):
     sliced.chain = strace.chain
 
     # Happy path where we do not need to load everything from the trace
-    if ((idx.step is None or idx.step >= 1) and
-            (idx.stop is None or idx.stop == len(strace))):
+    if (idx.step is None or idx.step >= 1) and (
+        idx.stop is None or idx.stop == len(strace)
+    ):
         start, stop, step = idx.indices(len(strace))
-        sliced.samples = {v: strace.get_values(v, burn=idx.start, thin=idx.step)
-                          for v in strace.varnames}
+        sliced.samples = {
+            v: strace.get_values(v, burn=idx.start, thin=idx.step)
+            for v in strace.varnames
+        }
         sliced.draw_idx = (stop - start) // step
     else:
         start, stop, step = idx.indices(len(strace))
-        sliced.samples = {v: strace.get_values(v)[start:stop:step]
-                          for v in strace.varnames}
+        sliced.samples = {
+            v: strace.get_values(v)[start:stop:step] for v in strace.varnames
+        }
         sliced.draw_idx = (stop - start) // step
 
     return sliced
diff --git a/pymc3/backends/report.py b/pymc3/backends/report.py
index f2b81d8761..77ff1f3425 100644
--- a/pymc3/backends/report.py
+++ b/pymc3/backends/report.py
@@ -4,7 +4,7 @@
 from ..util import is_transformed_name, get_untransformed_name
 
 
-logger = logging.getLogger('pymc3')
+logger = logging.getLogger("pymc3")
 
 
 @enum.unique
@@ -22,16 +22,16 @@ class WarningType(enum.Enum):
 
 
 SamplerWarning = namedtuple(
-    'SamplerWarning',
-    "kind, message, level, step, exec_info, extra")
+    "SamplerWarning", "kind, message, level, step, exec_info, extra"
+)
 
 
 _LEVELS = {
-    'info': logging.INFO,
-    'error': logging.ERROR,
-    'warn': logging.WARN,
-    'debug': logging.DEBUG,
-    'critical': logging.CRITICAL,
+    "info": logging.INFO,
+    "error": logging.ERROR,
+    "warn": logging.WARN,
+    "debug": logging.DEBUG,
+    "critical": logging.CRITICAL,
 }
 
 
@@ -50,21 +50,22 @@ def _warnings(self):
     @property
     def ok(self):
         """Whether the automatic convergence checks found serious problems."""
-        return all(_LEVELS[warn.level] < _LEVELS['warn']
-                   for warn in self._warnings)
+        return all(_LEVELS[warn.level] < _LEVELS["warn"] for warn in self._warnings)
 
-    def raise_ok(self, level='error'):
-        errors = [warn for warn in self._warnings
-                  if _LEVELS[warn.level] >= _LEVELS[level]]
+    def raise_ok(self, level="error"):
+        errors = [
+            warn for warn in self._warnings if _LEVELS[warn.level] >= _LEVELS[level]
+        ]
         if errors:
-            raise ValueError('Serious convergence issues during sampling.')
+            raise ValueError("Serious convergence issues during sampling.")
 
     def _run_convergence_checks(self, trace, model):
         if trace.nchains == 1:
-            msg = ("Only one chain was sampled, this makes it impossible to "
-                   "run some convergence checks")
-            warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info',
-                                  None, None, None)
+            msg = (
+                "Only one chain was sampled, this makes it impossible to "
+                "run some convergence checks"
+            )
+            warn = SamplerWarning(WarningType.BAD_PARAMS, msg, "info", None, None, None)
             self._add_warnings([warn])
             return
 
@@ -86,44 +87,61 @@ def _run_convergence_checks(self, trace, model):
         warnings = []
         rhat_max = max(val.max() for val in gelman_rubin.values())
         if rhat_max > 1.4:
-            msg = ("The gelman-rubin statistic is larger than 1.4 for some "
-                   "parameters. The sampler did not converge.")
+            msg = (
+                "The gelman-rubin statistic is larger than 1.4 for some "
+                "parameters. The sampler did not converge."
+            )
             warn = SamplerWarning(
-                WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin)
+                WarningType.CONVERGENCE, msg, "error", None, None, gelman_rubin
+            )
             warnings.append(warn)
         elif rhat_max > 1.2:
-            msg = ("The gelman-rubin statistic is larger than 1.2 for some "
-                   "parameters.")
+            msg = (
+                "The gelman-rubin statistic is larger than 1.2 for some " "parameters."
+            )
             warn = SamplerWarning(
-                WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin)
+                WarningType.CONVERGENCE, msg, "warn", None, None, gelman_rubin
+            )
             warnings.append(warn)
         elif rhat_max > 1.05:
-            msg = ("The gelman-rubin statistic is larger than 1.05 for some "
-                   "parameters. This indicates slight problems during "
-                   "sampling.")
+            msg = (
+                "The gelman-rubin statistic is larger than 1.05 for some "
+                "parameters. This indicates slight problems during "
+                "sampling."
+            )
             warn = SamplerWarning(
-                WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin)
+                WarningType.CONVERGENCE, msg, "info", None, None, gelman_rubin
+            )
             warnings.append(warn)
 
         eff_min = min(val.min() for val in effective_n.values())
         n_samples = len(trace) * trace.nchains
         if eff_min < 200 and n_samples >= 500:
-            msg = ("The estimated number of effective samples is smaller than "
-                   "200 for some parameters.")
+            msg = (
+                "The estimated number of effective samples is smaller than "
+                "200 for some parameters."
+            )
             warn = SamplerWarning(
-                WarningType.CONVERGENCE, msg, 'error', None, None, effective_n)
+                WarningType.CONVERGENCE, msg, "error", None, None, effective_n
+            )
             warnings.append(warn)
         elif eff_min / n_samples < 0.1:
-            msg = ("The number of effective samples is smaller than "
-                   "10% for some parameters.")
+            msg = (
+                "The number of effective samples is smaller than "
+                "10% for some parameters."
+            )
             warn = SamplerWarning(
-                WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n)
+                WarningType.CONVERGENCE, msg, "warn", None, None, effective_n
+            )
             warnings.append(warn)
         elif eff_min / n_samples < 0.25:
-            msg = ("The number of effective samples is smaller than "
-                   "25% for some parameters.")
+            msg = (
+                "The number of effective samples is smaller than "
+                "25% for some parameters."
+            )
             warn = SamplerWarning(
-                WarningType.CONVERGENCE, msg, 'info', None, None, effective_n)
+                WarningType.CONVERGENCE, msg, "info", None, None, effective_n
+            )
             warnings.append(warn)
 
         self._add_warnings(warnings)
@@ -136,7 +154,6 @@ def _add_warnings(self, warnings, chain=None):
         warn_list.extend(warnings)
 
     def _log_summary(self):
-
         def log_warning(warn):
             level = _LEVELS[warn.level]
             logger.log(level, warn.message)
@@ -155,17 +172,14 @@ def filter_warns(warnings):
             for warn in warnings:
                 if warn.step is None:
                     filtered.append(warn)
-                elif (start <= warn.step < stop and
-                        (warn.step - start) % step == 0):
+                elif start <= warn.step < stop and (warn.step - start) % step == 0:
                     warn = warn._replace(step=warn.step - start)
                     filtered.append(warn)
             return filtered
 
         report._add_warnings(filter_warns(self._global_warnings))
         for chain in self._chain_warnings:
-            report._add_warnings(
-                filter_warns(self._chain_warnings[chain]),
-                chain)
+            report._add_warnings(filter_warns(self._chain_warnings[chain]), chain)
 
         return report
 
diff --git a/pymc3/backends/sqlite.py b/pymc3/backends/sqlite.py
index e0f0ff74a0..7c8b93981c 100644
--- a/pymc3/backends/sqlite.py
+++ b/pymc3/backends/sqlite.py
@@ -23,35 +23,42 @@
 from . import tracetab as ttab
 
 TEMPLATES = {
-    'table':            ('CREATE TABLE IF NOT EXISTS [{table}] '
-                         '(recid INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, '
-                         'draw INTEGER, chain INT(5), '
-                         '{value_cols})'),
-    'insert':           ('INSERT INTO [{table}] '
-                         '(recid, draw, chain, {value_cols}) '
-                         'VALUES (NULL, ?, ?, {values})'),
-    'max_draw':         ('SELECT MAX(draw) FROM [{table}] '
-                         'WHERE chain = ?'),
-    'draw_count':       ('SELECT COUNT(*) FROM [{table}] '
-                         'WHERE chain = ?'),
+    "table": (
+        "CREATE TABLE IF NOT EXISTS [{table}] "
+        "(recid INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "
+        "draw INTEGER, chain INT(5), "
+        "{value_cols})"
+    ),
+    "insert": (
+        "INSERT INTO [{table}] "
+        "(recid, draw, chain, {value_cols}) "
+        "VALUES (NULL, ?, ?, {values})"
+    ),
+    "max_draw": ("SELECT MAX(draw) FROM [{table}] " "WHERE chain = ?"),
+    "draw_count": ("SELECT COUNT(*) FROM [{table}] " "WHERE chain = ?"),
     # Named placeholders are used in the selection templates because
     # some values occur more than once in the same template.
-    'select':           ('SELECT * FROM [{table}] '
-                         'WHERE (chain = :chain)'),
-    'select_burn':      ('SELECT * FROM [{table}] '
-                         'WHERE (chain = :chain) AND (draw > :burn)'),
-    'select_thin':      ('SELECT * FROM [{table}] '
-                         'WHERE (chain = :chain) AND '
-                         '(draw - (SELECT draw FROM [{table}] '
-                         'WHERE chain = :chain '
-                         'ORDER BY draw LIMIT 1)) % :thin = 0'),
-    'select_burn_thin': ('SELECT * FROM [{table}] '
-                         'WHERE (chain = :chain) AND (draw > :burn) '
-                         'AND (draw - (SELECT draw FROM [{table}] '
-                         'WHERE (chain = :chain) AND (draw > :burn) '
-                         'ORDER BY draw LIMIT 1)) % :thin = 0'),
-    'select_point':     ('SELECT * FROM [{table}] '
-                         'WHERE (chain = :chain) AND (draw = :draw)'),
+    "select": ("SELECT * FROM [{table}] " "WHERE (chain = :chain)"),
+    "select_burn": (
+        "SELECT * FROM [{table}] " "WHERE (chain = :chain) AND (draw > :burn)"
+    ),
+    "select_thin": (
+        "SELECT * FROM [{table}] "
+        "WHERE (chain = :chain) AND "
+        "(draw - (SELECT draw FROM [{table}] "
+        "WHERE chain = :chain "
+        "ORDER BY draw LIMIT 1)) % :thin = 0"
+    ),
+    "select_burn_thin": (
+        "SELECT * FROM [{table}] "
+        "WHERE (chain = :chain) AND (draw > :burn) "
+        "AND (draw - (SELECT draw FROM [{table}] "
+        "WHERE (chain = :chain) AND (draw > :burn) "
+        "ORDER BY draw LIMIT 1)) % :thin = 0"
+    ),
+    "select_point": (
+        "SELECT * FROM [{table}] " "WHERE (chain = :chain) AND (draw = :draw)"
+    ),
 }
 
 sqlite3.register_adapter(np.int32, int)
@@ -108,35 +115,36 @@ def setup(self, draws, chain):
             self.draw_idx = self._get_max_draw(chain) + 1
             self._len = None
         else:  # Table has not been created.
-            self._var_cols = {varname: ttab.create_flat_names('v', shape)
-                              for varname, shape in self.var_shapes.items()}
+            self._var_cols = {
+                varname: ttab.create_flat_names("v", shape)
+                for varname, shape in self.var_shapes.items()
+            }
             self._create_table()
             self._is_setup = True
         self._create_insert_queries()
         self._closed = False
 
     def _create_table(self):
-        template = TEMPLATES['table']
+        template = TEMPLATES["table"]
         with self.db.con:
             for varname, var_cols in self._var_cols.items():
                 if np.issubdtype(self.var_dtypes[varname], np.integer):
-                    dtype = 'INT'
+                    dtype = "INT"
                 else:
-                    dtype = 'FLOAT'
-                colnames = ', '.join([v + ' ' + dtype for v in var_cols])
-                statement = template.format(table=varname,
-                                            value_cols=colnames)
+                    dtype = "FLOAT"
+                colnames = ", ".join([v + " " + dtype for v in var_cols])
+                statement = template.format(table=varname, value_cols=colnames)
                 self.db.cursor.execute(statement)
 
     def _create_insert_queries(self):
-        template = TEMPLATES['insert']
+        template = TEMPLATES["insert"]
         for varname, var_cols in self._var_cols.items():
             # Create insert statement for each variable.
-            var_str = ', '.join(var_cols)
-            placeholders = ', '.join(['?'] * len(var_cols))
-            statement = template.format(table=varname,
-                                        value_cols=var_str,
-                                        values=placeholders)
+            var_str = ", ".join(var_cols)
+            placeholders = ", ".join(["?"] * len(var_cols))
+            statement = template.format(
+                table=varname, value_cols=var_str, values=placeholders
+            )
             self.var_inserts[varname] = statement
 
     def record(self, point):
@@ -160,8 +168,9 @@ def _execute_queue(self):
             for varname in self.varnames:
                 if not self._queue[varname]:
                     continue
-                self.db.cursor.executemany(self.var_inserts[varname],
-                                           self._queue[varname])
+                self.db.cursor.executemany(
+                    self.var_inserts[varname], self._queue[varname]
+                )
                 self._queue[varname] = []
 
     def close(self):
@@ -182,7 +191,7 @@ def __len__(self):
 
     def _get_number_draws(self):
         self.db.connect()
-        statement = TEMPLATES['draw_count'].format(table=self.varnames[0])
+        statement = TEMPLATES["draw_count"].format(table=self.varnames[0])
         self.db.cursor.execute(statement, (self.chain,))
         counts = self.db.cursor.fetchall()[0][0]
         if counts is None:
@@ -192,8 +201,8 @@ def _get_number_draws(self):
 
     def _get_max_draw(self, chain):
         self.db.connect()
-        statement = TEMPLATES['max_draw'].format(table=self.varnames[0])
-        self.db.cursor.execute(statement, (chain, ))
+        statement = TEMPLATES["max_draw"].format(table=self.varnames[0])
+        self.db.cursor.execute(statement, (chain,))
         counts = self.db.cursor.fetchall()[0][0]
         if counts is None:
             return 0
@@ -221,23 +230,24 @@ def get_values(self, varname, burn=0, thin=1):
         if burn < 0:
             burn = max(0, len(self) + burn)
         if thin < 1:
-            raise ValueError('Only positive thin values are supported '
-                             'in SQLite backend.')
+            raise ValueError(
+                "Only positive thin values are supported " "in SQLite backend."
+            )
         varname = str(varname)
 
-        statement_args = {'chain': self.chain}
+        statement_args = {"chain": self.chain}
         if burn == 0 and thin == 1:
-            action = 'select'
+            action = "select"
         elif thin == 1:
-            action = 'select_burn'
-            statement_args['burn'] = burn - 1
+            action = "select_burn"
+            statement_args["burn"] = burn - 1
         elif burn == 0:
-            action = 'select_thin'
-            statement_args['thin'] = thin
+            action = "select_thin"
+            statement_args["thin"] = thin
         else:
-            action = 'select_burn_thin'
-            statement_args['burn'] = burn - 1
-            statement_args['thin'] = thin
+            action = "select_burn_thin"
+            statement_args["burn"] = burn - 1
+            statement_args["thin"] = thin
 
         self.db.connect()
         shape = (-1,) + self.var_shapes[varname]
@@ -248,7 +258,7 @@ def get_values(self, varname, burn=0, thin=1):
 
     def _slice(self, idx):
         if idx.stop is not None:
-            raise ValueError('Stop value in slice not supported.')
+            raise ValueError("Stop value in slice not supported.")
         return ndarray._slice_as_ndarray(self, idx)
 
     def point(self, idx):
@@ -258,20 +268,18 @@ def point(self, idx):
         idx = int(idx)
         if idx < 0:
             idx = self._get_max_draw(self.chain) + idx + 1
-        statement = TEMPLATES['select_point']
+        statement = TEMPLATES["select_point"]
         self.db.connect()
         var_values = {}
-        statement_args = {'chain': self.chain, 'draw': idx}
+        statement_args = {"chain": self.chain, "draw": idx}
         for varname in self.varnames:
-            self.db.cursor.execute(statement.format(table=varname),
-                                   statement_args)
+            self.db.cursor.execute(statement.format(table=varname), statement_args)
             values = _rows_to_ndarray(self.db.cursor)
             var_values[varname] = values.reshape(self.var_shapes[varname])
         return var_values
 
 
 class _SQLiteDB(object):
-
     def __init__(self, name):
         self.name = name
         self.con = None
@@ -312,16 +320,17 @@ def load(name, model=None):
     db.connect()
     varnames = _get_table_list(db.cursor)
     if len(varnames) == 0:
-        raise ValueError(('Can not get variable list for database'
-                          '`{}`'.format(name)))
+        raise ValueError(("Can not get variable list for database" "`{}`".format(name)))
     chains = _get_chain_list(db.cursor, varnames[0])
 
     straces = []
     for chain in chains:
         strace = SQLite(name, model=model)
         strace.chain = chain
-        strace._var_cols = {varname: ttab.create_flat_names('v', shape)
-                            for varname, shape in strace.var_shapes.items()}
+        strace._var_cols = {
+            varname: ttab.create_flat_names("v", shape)
+            for varname, shape in strace.var_shapes.items()
+        }
         strace._is_setup = True
         strace.db = db  # Share the db with all traces.
         straces.append(strace)
@@ -332,21 +341,23 @@ def _get_table_list(cursor):
     """Return a list of table names in the current database."""
     # Modified from Django. Skips the sqlite_sequence system table used
     # for autoincrement key generation.
-    cursor.execute("SELECT name FROM sqlite_master "
-                   "WHERE type='table' AND NOT name='sqlite_sequence' "
-                   "ORDER BY name")
+    cursor.execute(
+        "SELECT name FROM sqlite_master "
+        "WHERE type='table' AND NOT name='sqlite_sequence' "
+        "ORDER BY name"
+    )
     return [row[0] for row in cursor.fetchall()]
 
 
 def _get_var_strs(cursor, varname):
-    cursor.execute('SELECT * FROM [{}]'.format(varname))
+    cursor.execute("SELECT * FROM [{}]".format(varname))
     col_names = (col_descr[0] for col_descr in cursor.description)
-    return [name for name in col_names if name.startswith('v')]
+    return [name for name in col_names if name.startswith("v")]
 
 
 def _get_chain_list(cursor, varname):
     """Return a list of sorted chains for `varname`."""
-    cursor.execute('SELECT DISTINCT chain FROM [{}]'.format(varname))
+    cursor.execute("SELECT DISTINCT chain FROM [{}]".format(varname))
     chains = [chain[0] for chain in cursor.fetchall()]
     chains.sort()
     return chains
diff --git a/pymc3/backends/text.py b/pymc3/backends/text.py
index 027a748e31..b9b5de373a 100644
--- a/pymc3/backends/text.py
+++ b/pymc3/backends/text.py
@@ -45,8 +45,9 @@ def __init__(self, name, model=None, vars=None, test_point=None):
             os.mkdir(name)
         super(Text, self).__init__(name, model, vars, test_point)
 
-        self.flat_names = {v: ttab.create_flat_names(v, shape)
-                           for v, shape in self.var_shapes.items()}
+        self.flat_names = {
+            v: ttab.create_flat_names(v, shape) for v, shape in self.var_shapes.items()
+        }
 
         self.filename = None
         self._fh = None
@@ -68,21 +69,22 @@ def setup(self, draws, chain):
             self._fh.close()
 
         self.chain = chain
-        self.filename = os.path.join(self.name, 'chain-{}.csv'.format(chain))
+        self.filename = os.path.join(self.name, "chain-{}.csv".format(chain))
 
         cnames = [fv for v in self.varnames for fv in self.flat_names[v]]
 
         if os.path.exists(self.filename):
             with open(self.filename) as fh:
-                prev_cnames = next(fh).strip().split(',')
+                prev_cnames = next(fh).strip().split(",")
             if prev_cnames != cnames:
                 raise base.BackendError(
                     "Previous file '{}' has different variables names "
-                    "than current model.".format(self.filename))
-            self._fh = open(self.filename, 'a')
+                    "than current model.".format(self.filename)
+                )
+            self._fh = open(self.filename, "a")
         else:
-            self._fh = open(self.filename, 'w')
-            self._fh.write(','.join(cnames) + '\n')
+            self._fh = open(self.filename, "w")
+            self._fh.write(",".join(cnames) + "\n")
 
     def record(self, point):
         """Record results of a sampling iteration.
@@ -96,7 +98,7 @@ def record(self, point):
         for varname, value in zip(self.varnames, self.fn(point)):
             vals[varname] = value.ravel()
         columns = [str(val) for var in self.varnames for val in vals[var]]
-        self._fh.write(','.join(columns) + '\n')
+        self._fh.write(",".join(columns) + "\n")
 
     def close(self):
         if self._fh is not None:
@@ -112,7 +114,6 @@ def _load_df(self):
                 if "float" in str(dtype):
                     self.df[key] = floatX(self.df[key])
 
-
     def __len__(self):
         if self.filename is None:
             return 0
@@ -140,7 +141,7 @@ def get_values(self, varname, burn=0, thin=1):
 
     def _slice(self, idx):
         if idx.stop is not None:
-            raise ValueError('Stop value in slice not supported.')
+            raise ValueError("Stop value in slice not supported.")
         return ndarray._slice_as_ndarray(self, idx)
 
     def point(self, idx):
@@ -170,14 +171,14 @@ def load(name, model=None):
     -------
     A MultiTrace instance
     """
-    files = glob(os.path.join(name, 'chain-*.csv'))
+    files = glob(os.path.join(name, "chain-*.csv"))
 
     if len(files) == 0:
-        raise ValueError('No files present in directory {}'.format(name))
+        raise ValueError("No files present in directory {}".format(name))
 
     straces = []
     for f in files:
-        chain = int(os.path.splitext(f)[0].rsplit('-', 1)[1])
+        chain = int(os.path.splitext(f)[0].rsplit("-", 1)[1])
         strace = Text(name, model=model)
         strace.chain = chain
         strace.filename = f
@@ -203,7 +204,6 @@ def dump(name, trace, chains=None):
         chains = trace.chains
 
     for chain in chains:
-        filename = os.path.join(name, 'chain-{}.csv'.format(chain))
-        df = ttab.trace_to_dataframe(
-            trace, chains=chain, include_transformed=True)
+        filename = os.path.join(name, "chain-{}.csv".format(chain))
+        df = ttab.trace_to_dataframe(trace, chains=chain, include_transformed=True)
         df.to_csv(filename, index=False)
diff --git a/pymc3/backends/tracetab.py b/pymc3/backends/tracetab.py
index 39b5dc4ffa..c481cf2d55 100644
--- a/pymc3/backends/tracetab.py
+++ b/pymc3/backends/tracetab.py
@@ -6,7 +6,7 @@
 
 from ..util import get_default_varnames
 
-__all__ = ['trace_to_dataframe']
+__all__ = ["trace_to_dataframe"]
 
 
 def trace_to_dataframe(trace, chains=None, varnames=None, include_transformed=False):
@@ -28,8 +28,9 @@ def trace_to_dataframe(trace, chains=None, varnames=None, include_transformed=Fa
     var_shapes = trace._straces[0].var_shapes
 
     if varnames is None:
-        varnames = get_default_varnames(var_shapes.keys(),
-                                        include_transformed=include_transformed)
+        varnames = get_default_varnames(
+            var_shapes.keys(), include_transformed=include_transformed
+        )
 
     flat_names = {v: create_flat_names(v, var_shapes[v]) for v in varnames}
 
@@ -56,13 +57,13 @@ def create_flat_names(varname, shape):
         return [varname]
     labels = (np.ravel(xs).tolist() for xs in np.indices(shape))
     labels = (map(str, xs) for xs in labels)
-    return ['{}__{}'.format(varname, '_'.join(idxs)) for idxs in zip(*labels)]
+    return ["{}__{}".format(varname, "_".join(idxs)) for idxs in zip(*labels)]
 
 
 def _create_shape(flat_names):
     """Determine shape from `create_flat_names` output."""
     try:
-        _, shape_str = flat_names[-1].rsplit('__', 1)
+        _, shape_str = flat_names[-1].rsplit("__", 1)
     except ValueError:
         return ()
-    return tuple(int(i) + 1 for i in shape_str.split('_'))
+    return tuple(int(i) + 1 for i in shape_str.split("_"))
diff --git a/pymc3/blocking.py b/pymc3/blocking.py
index f95856229e..3b4bb35540 100644
--- a/pymc3/blocking.py
+++ b/pymc3/blocking.py
@@ -7,10 +7,10 @@
 import numpy as np
 import collections
 
-__all__ = ['ArrayOrdering', 'DictToArrayBijection', 'DictToVarBijection']
+__all__ = ["ArrayOrdering", "DictToArrayBijection", "DictToVarBijection"]
 
-VarMap = collections.namedtuple('VarMap', 'var, slc, shp, dtyp')
-DataMap = collections.namedtuple('DataMap', 'list_ind, slc, shp, dtype, name')
+VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp")
+DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name")
 
 
 # TODO Classes and methods need to be fully documented.
@@ -29,11 +29,11 @@ def __init__(self, vars):
         for var in vars:
             name = var.name
             if name is None:
-                raise ValueError('Unnamed variable in ArrayOrdering.')
+                raise ValueError("Unnamed variable in ArrayOrdering.")
             if name in self.by_name:
-                raise ValueError('Name of variable not unique: %s.' % name)
-            if not hasattr(var, 'dshape') or not hasattr(var, 'dsize'):
-                raise ValueError('Shape of variable not known %s' % name)
+                raise ValueError("Name of variable not unique: %s." % name)
+            if not hasattr(var, "dshape") or not hasattr(var, "dsize"):
+                raise ValueError("Shape of variable not known %s" % name)
 
             slc = slice(self.size, self.size + var.dsize)
             varmap = VarMap(name, slc, var.dshape, var.dtype)
@@ -55,12 +55,12 @@ def __init__(self, ordering, dpoint):
         self.dpt = dpoint
 
         # determine smallest float dtype that will fit all data
-        if all([x.dtyp == 'float16' for x in ordering.vmap]):
-            self.array_dtype = 'float16'
-        elif all([x.dtyp == 'float32' for x in ordering.vmap]):
-            self.array_dtype = 'float32'
+        if all([x.dtyp == "float16" for x in ordering.vmap]):
+            self.array_dtype = "float16"
+        elif all([x.dtyp == "float32" for x in ordering.vmap]):
+            self.array_dtype = "float32"
         else:
-            self.array_dtype = 'float64'
+            self.array_dtype = "float64"
 
     def map(self, dpt):
         """
@@ -119,22 +119,23 @@ class ListArrayOrdering(object):
         defining the input type 'tensor' or 'numpy'
     """
 
-    def __init__(self, list_arrays, intype='numpy'):
-        if intype not in {'tensor', 'numpy'}:
+    def __init__(self, list_arrays, intype="numpy"):
+        if intype not in {"tensor", "numpy"}:
             raise ValueError("intype not in {'tensor', 'numpy'}")
         self.vmap = []
         self.intype = intype
         self.size = 0
         for array in list_arrays:
-            if self.intype == 'tensor':
+            if self.intype == "tensor":
                 name = array.name
                 array = array.tag.test_value
             else:
-                name = 'numpy'
+                name = "numpy"
 
             slc = slice(self.size, self.size + array.size)
-            self.vmap.append(DataMap(
-                len(self.vmap), slc, array.shape, array.dtype, name))
+            self.vmap.append(
+                DataMap(len(self.vmap), slc, array.shape, array.dtype, name)
+            )
             self.size += array.size
 
 
@@ -211,8 +212,7 @@ def rmap(self, array):
         a_list = copy.copy(self.list_arrays)
 
         for list_ind, slc, shp, dtype, _ in self.ordering.vmap:
-            a_list[list_ind] = np.atleast_1d(
-                                    array)[slc].reshape(shp).astype(dtype)
+            a_list[list_ind] = np.atleast_1d(array)[slc].reshape(shp).astype(dtype)
 
         return a_list
 
diff --git a/pymc3/data.py b/pymc3/data.py
index c01784edcc..9bd7cbfec8 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -8,12 +8,7 @@
 import theano.tensor as tt
 import theano
 
-__all__ = [
-    'get_data',
-    'GeneratorAdapter',
-    'Minibatch',
-    'align_minibatches'
-]
+__all__ = ["get_data", "GeneratorAdapter", "Minibatch", "align_minibatches"]
 
 
 def get_data(filename):
@@ -27,8 +22,8 @@ def get_data(filename):
     -------
     BytesIO of the data
     """
-    data_pkg = 'pymc3.examples'
-    return io.BytesIO(pkgutil.get_data(data_pkg, os.path.join('data', filename)))
+    data_pkg = "pymc3.examples"
+    return io.BytesIO(pkgutil.get_data(data_pkg, os.path.join("data", filename)))
 
 
 class GenTensorVariable(tt.TensorVariable):
@@ -61,14 +56,14 @@ def make_variable(self, gop, name=None):
 
     def __init__(self, generator):
         if not pm.vartypes.isgenerator(generator):
-            raise TypeError('Object should be generator like')
+            raise TypeError("Object should be generator like")
         self.test_value = pm.smartfloatX(copy(next(generator)))
         # make pickling potentially possible
         self._yielded_test_value = False
         self.gen = generator
         self.tensortype = tt.TensorType(
-            self.test_value.dtype,
-            ((False, ) * self.test_value.ndim))
+            self.test_value.dtype, ((False,) * self.test_value.ndim)
+        )
 
     # python3 generator
     def __next__(self):
@@ -225,9 +220,18 @@ class Minibatch(tt.TensorVariable):
 
     RNG = collections.defaultdict(list)
 
-    @theano.configparser.change_flags(compute_test_value='raise')
-    def __init__(self, data, batch_size=128, dtype=None, broadcastable=None, name='Minibatch',
-                 random_seed=42, update_shared_f=None, in_memory_size=None):
+    @theano.configparser.change_flags(compute_test_value="raise")
+    def __init__(
+        self,
+        data,
+        batch_size=128,
+        dtype=None,
+        broadcastable=None,
+        name="Minibatch",
+        random_seed=42,
+        update_shared_f=None,
+        in_memory_size=None,
+    ):
         if dtype is None:
             data = pm.smartfloatX(np.asarray(data))
         else:
@@ -235,17 +239,16 @@ def __init__(self, data, batch_size=128, dtype=None, broadcastable=None, name='M
         in_memory_slc = self.make_static_slices(in_memory_size)
         self.shared = theano.shared(data[in_memory_slc])
         self.update_shared_f = update_shared_f
-        self.random_slc = self.make_random_slices(self.shared.shape, batch_size, random_seed)
+        self.random_slc = self.make_random_slices(
+            self.shared.shape, batch_size, random_seed
+        )
         minibatch = self.shared[self.random_slc]
         if broadcastable is None:
-            broadcastable = (False, ) * minibatch.ndim
+            broadcastable = (False,) * minibatch.ndim
         minibatch = tt.patternbroadcast(minibatch, broadcastable)
         self.minibatch = minibatch
-        super(Minibatch, self).__init__(
-            self.minibatch.type, None, None, name=name)
-        theano.Apply(
-            theano.compile.view_op,
-            inputs=[self.minibatch], outputs=[self])
+        super(Minibatch, self).__init__(self.minibatch.type, None, None, name=name)
+        theano.Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self])
         self.tag.test_value = copy(self.minibatch.tag.test_value)
 
     def rslice(self, total, size, seed):
@@ -254,11 +257,11 @@ def rslice(self, total, size, seed):
         elif isinstance(size, int):
             rng = pm.tt_rng(seed)
             Minibatch.RNG[id(self)].append(rng)
-            return (rng
-                    .uniform(size=(size, ), low=0.0, high=pm.floatX(total) - 1e-16)
-                    .astype('int64'))
+            return rng.uniform(
+                size=(size,), low=0.0, high=pm.floatX(total) - 1e-16
+            ).astype("int64")
         else:
-            raise TypeError('Unrecognized size type, %r' % size)
+            raise TypeError("Unrecognized size type, %r" % size)
 
     def __del__(self):
         del Minibatch.RNG[id(self)]
@@ -281,10 +284,10 @@ def make_static_slices(user_size):
                 elif isinstance(i, slice):
                     slc.append(i)
                 else:
-                    raise TypeError('Unrecognized size type, %r' % user_size)
+                    raise TypeError("Unrecognized size type, %r" % user_size)
             return slc
         else:
-            raise TypeError('Unrecognized size type, %r' % user_size)
+            raise TypeError("Unrecognized size type, %r" % user_size)
 
     def make_random_slices(self, in_memory_shape, batch_size, default_random_seed):
         if batch_size is None:
@@ -292,6 +295,7 @@ def make_random_slices(self, in_memory_shape, batch_size, default_random_seed):
         elif isinstance(batch_size, int):
             slc = [self.rslice(in_memory_shape[0], batch_size, default_random_seed)]
         elif isinstance(batch_size, (list, tuple)):
+
             def check(t):
                 if t is Ellipsis or t is None:
                     return True
@@ -305,12 +309,14 @@ def check(t):
                         return True
                     else:
                         return False
+
             # end check definition
             if not all(check(t) for t in batch_size):
-                raise TypeError('Unrecognized `batch_size` type, expected '
-                                'int or List[int|tuple(size, random_seed)] where '
-                                'size and random seed are both ints, got %r' %
-                                batch_size)
+                raise TypeError(
+                    "Unrecognized `batch_size` type, expected "
+                    "int or List[int|tuple(size, random_seed)] where "
+                    "size and random seed are both ints, got %r" % batch_size
+                )
             batch_size = [
                 (i, default_random_seed) if isinstance(i, int) else i
                 for i in batch_size
@@ -319,12 +325,14 @@ def check(t):
             if Ellipsis in batch_size:
                 sep = batch_size.index(Ellipsis)
                 begin = batch_size[:sep]
-                end = batch_size[sep + 1:]
+                end = batch_size[sep + 1 :]
                 if Ellipsis in end:
-                    raise ValueError('Double Ellipsis in `batch_size` is restricted, got %r' %
-                                     batch_size)
+                    raise ValueError(
+                        "Double Ellipsis in `batch_size` is restricted, got %r"
+                        % batch_size
+                    )
                 if len(end) > 0:
-                    shp_mid = shape[sep:-len(end)]
+                    shp_mid = shape[sep : -len(end)]
                     mid = [tt.arange(s) for s in shp_mid]
                 else:
                     mid = []
@@ -333,24 +341,31 @@ def check(t):
                 end = []
                 mid = []
             if (len(begin) + len(end)) > len(in_memory_shape.eval()):
-                raise ValueError('Length of `batch_size` is too big, '
-                                 'number of ints is bigger that ndim, got %r'
-                                 % batch_size)
+                raise ValueError(
+                    "Length of `batch_size` is too big, "
+                    "number of ints is bigger that ndim, got %r" % batch_size
+                )
             if len(end) > 0:
-                shp_end = shape[-len(end):]
+                shp_end = shape[-len(end) :]
             else:
                 shp_end = np.asarray([])
-            shp_begin = shape[:len(begin)]
-            slc_begin = [self.rslice(shp_begin[i], t[0], t[1])
-                         if t is not None else tt.arange(shp_begin[i])
-                         for i, t in enumerate(begin)]
-            slc_end = [self.rslice(shp_end[i], t[0], t[1])
-                       if t is not None else tt.arange(shp_end[i])
-                       for i, t in enumerate(end)]
+            shp_begin = shape[: len(begin)]
+            slc_begin = [
+                self.rslice(shp_begin[i], t[0], t[1])
+                if t is not None
+                else tt.arange(shp_begin[i])
+                for i, t in enumerate(begin)
+            ]
+            slc_end = [
+                self.rslice(shp_end[i], t[0], t[1])
+                if t is not None
+                else tt.arange(shp_end[i])
+                for i, t in enumerate(end)
+            ]
             slc = slc_begin + mid + slc_end
             slc = slc
         else:
-            raise TypeError('Unrecognized size type, %r' % batch_size)
+            raise TypeError("Unrecognized size type, %r" % batch_size)
         return pm.theanof.ix_(*slc)
 
     def update_shared(self):
@@ -376,6 +391,6 @@ def align_minibatches(batches=None):
     else:
         for b in batches:
             if not isinstance(b, Minibatch):
-                raise TypeError('{b} is not a Minibatch')
+                raise TypeError("{b} is not a Minibatch")
             for rng in Minibatch.RNG[id(b)]:
                 rng.seed()
diff --git a/pymc3/diagnostics.py b/pymc3/diagnostics.py
index e44f27203a..4ae4fda450 100644
--- a/pymc3/diagnostics.py
+++ b/pymc3/diagnostics.py
@@ -5,12 +5,12 @@
 from .util import get_default_varnames
 from .backends.base import MultiTrace
 
-__all__ = ['geweke', 'gelman_rubin', 'effective_n']
+__all__ = ["geweke", "gelman_rubin", "effective_n"]
 
 
 @statfunc
-def geweke(x, first=.1, last=.5, intervals=20):
-    R"""Return z-scores for convergence diagnostics.
+def geweke(x, first=0.1, last=0.5, intervals=20):
+    r"""Return z-scores for convergence diagnostics.
 
     Compare the mean of the first % of series with the mean of the last % of
     series. x is divided into a number of segments for which this difference is
@@ -58,14 +58,12 @@ def geweke(x, first=.1, last=.5, intervals=20):
     for interval in (first, last):
         if interval <= 0 or interval >= 1:
             raise ValueError(
-                "Invalid intervals for Geweke convergence analysis",
-                (first,
-                 last))
+                "Invalid intervals for Geweke convergence analysis", (first, last)
+            )
     if first + last >= 1:
         raise ValueError(
-            "Invalid intervals for Geweke convergence analysis",
-            (first,
-             last))
+            "Invalid intervals for Geweke convergence analysis", (first, last)
+        )
 
     # Initialize list of z-scores
     zscores = []
@@ -77,14 +75,15 @@ def geweke(x, first=.1, last=.5, intervals=20):
     last_start_idx = (1 - last) * end
 
     # Calculate starting indices
-    start_indices = np.arange(0, int(last_start_idx), step=int(
-        (last_start_idx) / (intervals - 1)))
+    start_indices = np.arange(
+        0, int(last_start_idx), step=int((last_start_idx) / (intervals - 1))
+    )
 
     # Loop over start indices
     for start in start_indices:
         # Calculate slices
-        first_slice = x[start: start + int(first * (end - start))]
-        last_slice = x[int(end - last * (end - start)):]
+        first_slice = x[start : start + int(first * (end - start))]
+        last_slice = x[int(end - last * (end - start)) :]
 
         z = first_slice.mean() - last_slice.mean()
         z /= np.sqrt(first_slice.var() + last_slice.var())
@@ -98,7 +97,7 @@ def geweke(x, first=.1, last=.5, intervals=20):
 
 
 def gelman_rubin(mtrace, varnames=None, include_transformed=False):
-    R"""Returns estimate of R for a set of traces.
+    r"""Returns estimate of R for a set of traces.
 
     The Gelman-Rubin diagnostic tests for lack of convergence by comparing
     the variance between multiple chains to the variance within each chain.
@@ -160,11 +159,13 @@ def rscore(x, num_samples):
 
     if mtrace.nchains < 2:
         raise ValueError(
-            'Gelman-Rubin diagnostic requires multiple chains '
-            'of the same length.')
+            "Gelman-Rubin diagnostic requires multiple chains " "of the same length."
+        )
 
     if varnames is None:
-        varnames = get_default_varnames(mtrace.varnames, include_transformed=include_transformed)
+        varnames = get_default_varnames(
+            mtrace.varnames, include_transformed=include_transformed
+        )
 
     Rhat = {}
 
@@ -177,7 +178,7 @@ def rscore(x, num_samples):
 
 
 def effective_n(mtrace, varnames=None, include_transformed=False):
-    R"""Returns estimate of the effective sample size of a set of traces.
+    r"""Returns estimate of the effective sample size of a set of traces.
 
     Parameters
     ----------
@@ -221,23 +222,23 @@ def get_neff(x):
         acov = np.asarray([autocov(trace_value[chain]) for chain in range(nchain)])
 
         chain_mean = trace_value.mean(axis=1)
-        chain_var = acov[:, 0] * n_samples / (n_samples - 1.)
-        acov_t = acov[:, 1] * n_samples / (n_samples - 1.)
+        chain_var = acov[:, 0] * n_samples / (n_samples - 1.0)
+        acov_t = acov[:, 1] * n_samples / (n_samples - 1.0)
         mean_var = np.mean(chain_var)
-        var_plus = mean_var * (n_samples - 1.) / n_samples
+        var_plus = mean_var * (n_samples - 1.0) / n_samples
         var_plus += np.var(chain_mean, ddof=1)
 
         rho_hat_t = np.zeros(n_samples)
-        rho_hat_even = 1.
+        rho_hat_even = 1.0
         rho_hat_t[0] = rho_hat_even
-        rho_hat_odd = 1. - (mean_var - np.mean(acov_t)) / var_plus
+        rho_hat_odd = 1.0 - (mean_var - np.mean(acov_t)) / var_plus
         rho_hat_t[1] = rho_hat_odd
         # Geyer's initial positive sequence
         max_t = 1
         t = 1
-        while t < (n_samples - 2) and (rho_hat_even + rho_hat_odd) >= 0.:
-            rho_hat_even = 1. - (mean_var - np.mean(acov[:, t + 1])) / var_plus
-            rho_hat_odd = 1. - (mean_var - np.mean(acov[:, t + 2])) / var_plus
+        while t < (n_samples - 2) and (rho_hat_even + rho_hat_odd) >= 0.0:
+            rho_hat_even = 1.0 - (mean_var - np.mean(acov[:, t + 1])) / var_plus
+            rho_hat_odd = 1.0 - (mean_var - np.mean(acov[:, t + 2])) / var_plus
             if (rho_hat_even + rho_hat_odd) >= 0:
                 rho_hat_t[t + 1] = rho_hat_even
                 rho_hat_t[t + 2] = rho_hat_odd
@@ -247,12 +248,14 @@ def get_neff(x):
         # Geyer's initial monotone sequence
         t = 3
         while t <= max_t - 2:
-            if (rho_hat_t[t + 1] + rho_hat_t[t + 2]) > (rho_hat_t[t - 1] + rho_hat_t[t]):
-                rho_hat_t[t + 1] = (rho_hat_t[t - 1] + rho_hat_t[t]) / 2.
+            if (rho_hat_t[t + 1] + rho_hat_t[t + 2]) > (
+                rho_hat_t[t - 1] + rho_hat_t[t]
+            ):
+                rho_hat_t[t + 1] = (rho_hat_t[t - 1] + rho_hat_t[t]) / 2.0
                 rho_hat_t[t + 2] = rho_hat_t[t + 1]
             t += 2
         ess = nchain * n_samples
-        ess = ess / (-1. + 2. * np.sum(rho_hat_t))
+        ess = ess / (-1.0 + 2.0 * np.sum(rho_hat_t))
         return ess
 
     def generate_neff(trace_values):
@@ -288,11 +291,14 @@ def generate_neff(trace_values):
 
     if mtrace.nchains < 2:
         raise ValueError(
-            'Calculation of effective sample size requires multiple chains '
-            'of the same length.')
+            "Calculation of effective sample size requires multiple chains "
+            "of the same length."
+        )
 
     if varnames is None:
-        varnames = get_default_varnames(mtrace.varnames,include_transformed=include_transformed)
+        varnames = get_default_varnames(
+            mtrace.varnames, include_transformed=include_transformed
+        )
 
     n_eff = {}
 
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index 3756a738ec..5787f88a8b 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -86,74 +86,75 @@
 
 from .bound import Bound
 
-__all__ = ['Uniform',
-           'Flat',
-           'HalfFlat',
-           'TruncatedNormal',
-           'Normal',
-           'Beta',
-           'Kumaraswamy',
-           'Exponential',
-           'Laplace',
-           'StudentT',
-           'Cauchy',
-           'HalfCauchy',
-           'Gamma',
-           'Weibull',
-           'Bound',
-           'Lognormal',
-           'HalfStudentT',
-           'ChiSquared',
-           'HalfNormal',
-           'Wald',
-           'Pareto',
-           'InverseGamma',
-           'ExGaussian',
-           'VonMises',
-           'Binomial',
-           'BetaBinomial',
-           'Bernoulli',
-           'Poisson',
-           'NegativeBinomial',
-           'ConstantDist',
-           'Constant',
-           'ZeroInflatedPoisson',
-           'ZeroInflatedNegativeBinomial',
-           'ZeroInflatedBinomial',
-           'DiscreteUniform',
-           'Geometric',
-           'Categorical',
-           'OrderedLogistic',
-           'DensityDist',
-           'Distribution',
-           'Continuous',
-           'Discrete',
-           'NoDistribution',
-           'TensorType',
-           'MvNormal',
-           'MatrixNormal',
-           'KroneckerNormal',
-           'MvStudentT',
-           'Dirichlet',
-           'Multinomial',
-           'Wishart',
-           'WishartBartlett',
-           'LKJCholeskyCov',
-           'LKJCorr',
-           'AR1',
-           'AR',
-           'GaussianRandomWalk',
-           'MvGaussianRandomWalk',
-           'MvStudentTRandomWalk',
-           'GARCH11',
-           'SkewNormal',
-           'Mixture',
-           'NormalMixture',
-           'Triangular',
-           'DiscreteWeibull',
-           'Gumbel',
-           'Logistic',
-           'LogitNormal',
-           'Interpolated',
-           'Bound',
-           ]
+__all__ = [
+    "Uniform",
+    "Flat",
+    "HalfFlat",
+    "TruncatedNormal",
+    "Normal",
+    "Beta",
+    "Kumaraswamy",
+    "Exponential",
+    "Laplace",
+    "StudentT",
+    "Cauchy",
+    "HalfCauchy",
+    "Gamma",
+    "Weibull",
+    "Bound",
+    "Lognormal",
+    "HalfStudentT",
+    "ChiSquared",
+    "HalfNormal",
+    "Wald",
+    "Pareto",
+    "InverseGamma",
+    "ExGaussian",
+    "VonMises",
+    "Binomial",
+    "BetaBinomial",
+    "Bernoulli",
+    "Poisson",
+    "NegativeBinomial",
+    "ConstantDist",
+    "Constant",
+    "ZeroInflatedPoisson",
+    "ZeroInflatedNegativeBinomial",
+    "ZeroInflatedBinomial",
+    "DiscreteUniform",
+    "Geometric",
+    "Categorical",
+    "OrderedLogistic",
+    "DensityDist",
+    "Distribution",
+    "Continuous",
+    "Discrete",
+    "NoDistribution",
+    "TensorType",
+    "MvNormal",
+    "MatrixNormal",
+    "KroneckerNormal",
+    "MvStudentT",
+    "Dirichlet",
+    "Multinomial",
+    "Wishart",
+    "WishartBartlett",
+    "LKJCholeskyCov",
+    "LKJCorr",
+    "AR1",
+    "AR",
+    "GaussianRandomWalk",
+    "MvGaussianRandomWalk",
+    "MvStudentTRandomWalk",
+    "GARCH11",
+    "SkewNormal",
+    "Mixture",
+    "NormalMixture",
+    "Triangular",
+    "DiscreteWeibull",
+    "Gumbel",
+    "Logistic",
+    "LogitNormal",
+    "Interpolated",
+    "Bound",
+]
diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index f2928c9702..192e773a10 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -5,11 +5,16 @@
 import theano
 
 from pymc3.distributions.distribution import (
-    Distribution, Discrete, Continuous, draw_values, generate_samples)
+    Distribution,
+    Discrete,
+    Continuous,
+    draw_values,
+    generate_samples,
+)
 from pymc3.distributions import transforms
 from pymc3.distributions.dist_math import bound
 
-__all__ = ['Bound']
+__all__ = ["Bound"]
 
 
 class _Bounded(Distribution):
@@ -23,7 +28,7 @@ def __init__(self, distribution, lower, upper, default, *args, **kwargs):
             for name in defaults:
                 setattr(self, name, getattr(self._wrapped, name))
         else:
-            defaults = ('_default',)
+            defaults = ("_default",)
             self._default = default
 
         super(_Bounded, self).__init__(
@@ -31,7 +36,8 @@ def __init__(self, distribution, lower, upper, default, *args, **kwargs):
             dtype=self._wrapped.dtype,
             testval=self._wrapped.testval,
             defaults=defaults,
-            transform=self._wrapped.transform)
+            transform=self._wrapped.transform,
+        )
 
     def logp(self, value):
         logp = self._wrapped.logp(value)
@@ -49,15 +55,17 @@ def _random(self, lower, upper, point=None, size=None):
         lower = np.asarray(lower)
         upper = np.asarray(upper)
         if lower.size > 1 or upper.size > 1:
-            raise ValueError('Drawing samples from distributions with '
-                             'array-valued bounds is not supported.')
+            raise ValueError(
+                "Drawing samples from distributions with "
+                "array-valued bounds is not supported."
+            )
         samples = np.zeros(size, dtype=self.dtype).flatten()
         i, n = 0, len(samples)
         while i < len(samples):
             sample = np.atleast_1d(self._wrapped.random(point=point, size=n))
 
             select = sample[np.logical_and(sample >= lower, sample <= upper)]
-            samples[i:(i + len(select))] = select[:]
+            samples[i : (i + len(select))] = select[:]
             i += len(select)
             n -= len(select)
         if size is not None:
@@ -70,28 +78,27 @@ def random(self, point=None, size=None):
             return self._wrapped.random(point=point, size=size)
         elif self.lower is not None and self.upper is not None:
             lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
-            return generate_samples(self._random, lower, upper, point,
-                                    dist_shape=self.shape,
-                                    size=size)
+            return generate_samples(
+                self._random, lower, upper, point, dist_shape=self.shape, size=size
+            )
         elif self.lower is not None:
             lower = draw_values([self.lower], point=point, size=size)
-            return generate_samples(self._random, lower, np.inf, point,
-                                    dist_shape=self.shape,
-                                    size=size)
+            return generate_samples(
+                self._random, lower, np.inf, point, dist_shape=self.shape, size=size
+            )
         else:
             upper = draw_values([self.upper], point=point, size=size)
-            return generate_samples(self._random, -np.inf, upper, point,
-                                    dist_shape=self.shape,
-                                    size=size)
+            return generate_samples(
+                self._random, -np.inf, upper, point, dist_shape=self.shape, size=size
+            )
 
 
 class _DiscreteBounded(_Bounded, Discrete):
-    def __init__(self, distribution, lower, upper,
-                 transform='infer', *args, **kwargs):
-        if transform == 'infer':
+    def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs):
+        if transform == "infer":
             transform = None
         if transform is not None:
-            raise ValueError('Can not transform discrete variable.')
+            raise ValueError("Can not transform discrete variable.")
 
         if lower is None and upper is None:
             default = None
@@ -103,12 +110,17 @@ def __init__(self, distribution, lower, upper,
             default = lower + 1
 
         super(_DiscreteBounded, self).__init__(
-            distribution=distribution, lower=lower, upper=upper,
-            default=default, *args, **kwargs)
+            distribution=distribution,
+            lower=lower,
+            upper=upper,
+            default=default,
+            *args,
+            **kwargs
+        )
 
 
 class _ContinuousBounded(_Bounded, Continuous):
-    R"""
+    r"""
     An upper, lower or upper+lower bounded distribution
 
     Parameters
@@ -125,16 +137,15 @@ class _ContinuousBounded(_Bounded, Continuous):
         See pymc3.distributions.transforms for more information.
     """
 
-    def __init__(self, distribution, lower, upper,
-                 transform='infer', *args, **kwargs):
-        dtype = kwargs.get('dtype', theano.config.floatX)
+    def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs):
+        dtype = kwargs.get("dtype", theano.config.floatX)
 
         if lower is not None:
             lower = tt.as_tensor_variable(lower).astype(dtype)
         if upper is not None:
             upper = tt.as_tensor_variable(upper).astype(dtype)
 
-        if transform == 'infer':
+        if transform == "infer":
             if lower is None and upper is None:
                 transform = None
                 default = None
@@ -151,12 +162,18 @@ def __init__(self, distribution, lower, upper,
             default = None
 
         super(_ContinuousBounded, self).__init__(
-            distribution=distribution, lower=lower, upper=upper,
-            transform=transform, default=default, *args, **kwargs)
+            distribution=distribution,
+            lower=lower,
+            upper=upper,
+            transform=transform,
+            default=default,
+            *args,
+            **kwargs
+        )
 
 
 class Bound(object):
-    R"""
+    r"""
     Create a Bound variable object that can be applied to create
     a new upper, lower, or upper and lower bounded distribution.
 
@@ -207,30 +224,35 @@ def __init__(self, distribution, lower=None, upper=None):
         self.upper = upper
 
     def __call__(self, name, *args, **kwargs):
-        if 'observed' in kwargs:
-            raise ValueError('Observed Bound distributions are not supported. '
-                             'If you want to model truncated data '
-                             'you can use a pm.Potential in combination '
-                             'with the cumulative probability function. See '
-                             'pymc3/examples/censored_data.py for an example.')
+        if "observed" in kwargs:
+            raise ValueError(
+                "Observed Bound distributions are not supported. "
+                "If you want to model truncated data "
+                "you can use a pm.Potential in combination "
+                "with the cumulative probability function. See "
+                "pymc3/examples/censored_data.py for an example."
+            )
 
         if issubclass(self.distribution, Continuous):
-            return _ContinuousBounded(name, self.distribution,
-                                      self.lower, self.upper, *args, **kwargs)
+            return _ContinuousBounded(
+                name, self.distribution, self.lower, self.upper, *args, **kwargs
+            )
         elif issubclass(self.distribution, Discrete):
-            return _DiscreteBounded(name, self.distribution,
-                                    self.lower, self.upper, *args, **kwargs)
+            return _DiscreteBounded(
+                name, self.distribution, self.lower, self.upper, *args, **kwargs
+            )
         else:
-            raise ValueError(
-                'Distribution is neither continuous nor discrete.')
+            raise ValueError("Distribution is neither continuous nor discrete.")
 
     def dist(self, *args, **kwargs):
         if issubclass(self.distribution, Continuous):
             return _ContinuousBounded.dist(
-                self.distribution, self.lower, self.upper, *args, **kwargs)
+                self.distribution, self.lower, self.upper, *args, **kwargs
+            )
 
         elif issubclass(self.distribution, Discrete):
             return _DiscreteBounded.dist(
-                self.distribution, self.lower, self.upper, *args, **kwargs)
+                self.distribution, self.lower, self.upper, *args, **kwargs
+            )
         else:
-            raise ValueError('Distribution is neither continuous nor discrete.')
+            raise ValueError("Distribution is neither continuous nor discrete.")
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 984ca3e577..bb66eeceaa 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -20,45 +20,78 @@
 from .special import log_i0
 from ..math import invlogit, logit, logdiffexp
 from .dist_math import (
-    alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
-    normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
+    alltrue_elemwise,
+    betaln,
+    bound,
+    gammaln,
+    i0e,
+    incomplete_beta,
+    logpow,
+    normal_lccdf,
+    normal_lcdf,
+    SplineWrapper,
+    std_cdf,
+    zvalue,
 )
 from .distribution import Continuous, draw_values, generate_samples
 
-__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta',
-           'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy',
-           'HalfCauchy', 'Gamma', 'Weibull', 'HalfStudentT', 'Lognormal',
-           'ChiSquared', 'HalfNormal', 'Wald', 'Pareto', 'InverseGamma',
-           'ExGaussian', 'VonMises', 'SkewNormal', 'Triangular', 'Gumbel',
-           'Logistic', 'LogitNormal', 'Interpolated', 'Rice']
+__all__ = [
+    "Uniform",
+    "Flat",
+    "HalfFlat",
+    "Normal",
+    "TruncatedNormal",
+    "Beta",
+    "Kumaraswamy",
+    "Exponential",
+    "Laplace",
+    "StudentT",
+    "Cauchy",
+    "HalfCauchy",
+    "Gamma",
+    "Weibull",
+    "HalfStudentT",
+    "Lognormal",
+    "ChiSquared",
+    "HalfNormal",
+    "Wald",
+    "Pareto",
+    "InverseGamma",
+    "ExGaussian",
+    "VonMises",
+    "SkewNormal",
+    "Triangular",
+    "Gumbel",
+    "Logistic",
+    "LogitNormal",
+    "Interpolated",
+    "Rice",
+]
 
 
 class PositiveContinuous(Continuous):
     """Base class for positive continuous distributions"""
 
     def __init__(self, transform=transforms.log, *args, **kwargs):
-        super(PositiveContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
+        super(PositiveContinuous, self).__init__(transform=transform, *args, **kwargs)
 
 
 class UnitContinuous(Continuous):
     """Base class for continuous distributions on [0,1]"""
 
     def __init__(self, transform=transforms.logodds, *args, **kwargs):
-        super(UnitContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
+        super(UnitContinuous, self).__init__(transform=transform, *args, **kwargs)
 
 
 class BoundedContinuous(Continuous):
     """Base class for bounded continuous distributions"""
 
-    def __init__(self, transform='auto', lower=None, upper=None,
-                 *args, **kwargs):
+    def __init__(self, transform="auto", lower=None, upper=None, *args, **kwargs):
 
         lower = tt.as_tensor_variable(lower) if lower is not None else None
         upper = tt.as_tensor_variable(upper) if upper is not None else None
 
-        if transform == 'auto':
+        if transform == "auto":
             if lower is None and upper is None:
                 transform = None
             elif lower is not None and upper is None:
@@ -68,8 +101,7 @@ def __init__(self, transform='auto', lower=None, upper=None,
             else:
                 transform = transforms.interval(lower, upper)
 
-        super(BoundedContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
+        super(BoundedContinuous, self).__init__(transform=transform, *args, **kwargs)
 
 
 def assert_negative_support(var, label, distname, value=-1e-6):
@@ -78,8 +110,9 @@ def assert_negative_support(var, label, distname, value=-1e-6):
         return
     try:
         # Transformed distribution
-        support = np.isfinite(var.transformed.distribution.dist
-                              .logp(value).tag.test_value)
+        support = np.isfinite(
+            var.transformed.distribution.dist.logp(value).tag.test_value
+        )
     except AttributeError:
         try:
             # Untransformed distribution
@@ -90,7 +123,8 @@ def assert_negative_support(var, label, distname, value=-1e-6):
 
     if np.any(support):
         msg = "The variable specified for {0} has negative support for {1}, ".format(
-            label, distname)
+            label, distname
+        )
         msg += "likely making it unsuitable for this parameter."
         warnings.warn(msg)
 
@@ -118,27 +152,27 @@ def get_tau_sd(tau=None, sd=None):
     """
     if tau is None:
         if sd is None:
-            sd = 1.
-            tau = 1.
+            sd = 1.0
+            tau = 1.0
         else:
-            tau = sd**-2.
+            tau = sd ** -2.0
 
     else:
         if sd is not None:
             raise ValueError("Can't pass both tau and sd")
         else:
-            sd = tau**-.5
+            sd = tau ** -0.5
 
     # cast tau and sd to float in a way that works for both np.arrays
     # and pure python
-    tau = 1. * tau
-    sd = 1. * sd
+    tau = 1.0 * tau
+    sd = 1.0 * sd
 
     return floatX(tau), floatX(sd)
 
 
 class Uniform(BoundedContinuous):
-    R"""
+    r"""
     Continuous uniform log-likelihood.
 
     The pdf of this distribution is
@@ -182,11 +216,10 @@ class Uniform(BoundedContinuous):
     def __init__(self, lower=0, upper=1, *args, **kwargs):
         self.lower = lower = tt.as_tensor_variable(floatX(lower))
         self.upper = upper = tt.as_tensor_variable(floatX(upper))
-        self.mean = (upper + lower) / 2.
+        self.mean = (upper + lower) / 2.0
         self.median = self.mean
 
-        super(Uniform, self).__init__(
-            lower=lower, upper=upper, *args, **kwargs)
+        super(Uniform, self).__init__(lower=lower, upper=upper, *args, **kwargs)
 
     def random(self, point=None, size=None):
         """
@@ -206,12 +239,14 @@ def random(self, point=None, size=None):
         array
         """
 
-        lower, upper = draw_values([self.lower, self.upper],
-                                   point=point, size=size)
-        return generate_samples(stats.uniform.rvs, loc=lower,
-                                scale=upper - lower,
-                                dist_shape=self.shape,
-                                size=size)
+        lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
+        return generate_samples(
+            stats.uniform.rvs,
+            loc=lower,
+            scale=upper - lower,
+            dist_shape=self.shape,
+            size=size,
+        )
 
     def logp(self, value):
         """
@@ -228,17 +263,17 @@ def logp(self, value):
         """
         lower = self.lower
         upper = self.upper
-        return bound(-tt.log(upper - lower),
-                     value >= lower, value <= upper)
+        return bound(-tt.log(upper - lower), value >= lower, value <= upper)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         lower = dist.lower
         upper = dist.upper
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Uniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$'.format(
-            name, get_variable_name(lower), get_variable_name(upper))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Uniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$".format(
+            name, get_variable_name(lower), get_variable_name(upper)
+        )
 
     def logcdf(self, value):
         return tt.switch(
@@ -247,9 +282,8 @@ def logcdf(self, value):
             tt.switch(
                 tt.eq(value, self.upper),
                 0,
-                tt.log((value - self.lower)) -
-                tt.log((self.upper - self.lower))
-            )
+                tt.log((value - self.lower)) - tt.log((self.upper - self.lower)),
+            ),
         )
 
 
@@ -261,7 +295,7 @@ class Flat(Continuous):
 
     def __init__(self, *args, **kwargs):
         self._default = 0
-        super(Flat, self).__init__(defaults=('_default',), *args, **kwargs)
+        super(Flat, self).__init__(defaults=("_default",), *args, **kwargs)
 
     def random(self, point=None, size=None):
         """Raises ValueError as it is not possible to sample from Flat distribution
@@ -275,7 +309,7 @@ def random(self, point=None, size=None):
         -------
         ValueError
         """
-        raise ValueError('Cannot sample from Flat distribution')
+        raise ValueError("Cannot sample from Flat distribution")
 
     def logp(self, value):
         """
@@ -294,18 +328,14 @@ def logp(self, value):
         return tt.zeros_like(value)
 
     def _repr_latex_(self, name=None, dist=None):
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Flat}}()$'.format(name)
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Flat}}()$".format(name)
 
     def logcdf(self, value):
         return tt.switch(
             tt.eq(value, -np.inf),
             -np.inf,
-            tt.switch(
-                tt.eq(value, np.inf),
-                0,
-                tt.log(0.5)
-            )
+            tt.switch(tt.eq(value, np.inf), 0, tt.log(0.5)),
         )
 
 
@@ -314,7 +344,7 @@ class HalfFlat(PositiveContinuous):
 
     def __init__(self, *args, **kwargs):
         self._default = 1
-        super(HalfFlat, self).__init__(defaults=('_default',), *args, **kwargs)
+        super(HalfFlat, self).__init__(defaults=("_default",), *args, **kwargs)
 
     def random(self, point=None, size=None):
         """Raises ValueError as it is not possible to sample from HalfFlat distribution
@@ -328,7 +358,7 @@ def random(self, point=None, size=None):
         -------
         ValueError
         """
-        raise ValueError('Cannot sample from HalfFlat distribution')
+        raise ValueError("Cannot sample from HalfFlat distribution")
 
     def logp(self, value):
         """
@@ -347,23 +377,17 @@ def logp(self, value):
         return bound(tt.zeros_like(value), value > 0)
 
     def _repr_latex_(self, name=None, dist=None):
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{HalfFlat}}()$'.format(name)
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{HalfFlat}}()$".format(name)
 
     def logcdf(self, value):
         return tt.switch(
-            tt.lt(value, np.inf),
-            -np.inf,
-            tt.switch(
-                tt.eq(value, np.inf),
-                0,
-                -np.inf
-            )
+            tt.lt(value, np.inf), -np.inf, tt.switch(tt.eq(value, np.inf), 0, -np.inf)
         )
 
 
 class Normal(Continuous):
-    R"""
+    r"""
     Univariate normal log-likelihood.
 
     The pdf of this distribution is
@@ -431,10 +455,10 @@ def __init__(self, mu=0, sd=None, tau=None, **kwargs):
         self.tau = tt.as_tensor_variable(tau)
 
         self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
-        self.variance = 1. / self.tau
+        self.variance = 1.0 / self.tau
 
-        assert_negative_support(sd, 'sd', 'Normal')
-        assert_negative_support(tau, 'tau', 'Normal')
+        assert_negative_support(sd, "sd", "Normal")
+        assert_negative_support(tau, "tau", "Normal")
 
         super(Normal, self).__init__(**kwargs)
 
@@ -455,11 +479,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, tau, _ = draw_values([self.mu, self.tau, self.sd],
-                                 point=point, size=size)
-        return generate_samples(stats.norm.rvs, loc=mu, scale=tau**-0.5,
-                                dist_shape=self.shape,
-                                size=size)
+        mu, tau, _ = draw_values([self.mu, self.tau, self.sd], point=point, size=size)
+        return generate_samples(
+            stats.norm.rvs, loc=mu, scale=tau ** -0.5, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -479,25 +502,26 @@ def logp(self, value):
         tau = self.tau
         mu = self.mu
 
-        return bound((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2.,
-                     sd > 0)
+        return bound(
+            (-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, sd > 0
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         sd = dist.sd
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(sd))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(sd)
+        )
 
     def logcdf(self, value):
         return normal_lcdf(self.mu, self.sd, value)
 
 
 class TruncatedNormal(BoundedContinuous):
-    R"""
+    r"""
     Univariate truncated normal log-likelihood.
 
     The pdf of this distribution is
@@ -570,8 +594,17 @@ class TruncatedNormal(BoundedContinuous):
 
     """
 
-    def __init__(self, mu=0, sd=None, tau=None, lower=None, upper=None,
-                 transform='auto', *args, **kwargs):
+    def __init__(
+        self,
+        mu=0,
+        sd=None,
+        tau=None,
+        lower=None,
+        upper=None,
+        transform="auto",
+        *args,
+        **kwargs
+    ):
         tau, sd = get_tau_sd(tau=tau, sd=sd)
         self.sd = tt.as_tensor_variable(sd)
         self.tau = tt.as_tensor_variable(tau)
@@ -582,18 +615,23 @@ def __init__(self, mu=0, sd=None, tau=None, lower=None, upper=None,
         if self.lower is None and self.upper is None:
             self._defaultval = mu
         elif self.lower is None and self.upper is not None:
-            self._defaultval = self.upper - 1.
+            self._defaultval = self.upper - 1.0
         elif self.lower is not None and self.upper is None:
-            self._defaultval = self.lower + 1.
+            self._defaultval = self.lower + 1.0
         else:
             self._defaultval = (self.lower + self.upper) / 2
 
-        assert_negative_support(sd, 'sd', 'TruncatedNormal')
-        assert_negative_support(tau, 'tau', 'TruncatedNormal')
+        assert_negative_support(sd, "sd", "TruncatedNormal")
+        assert_negative_support(tau, "tau", "TruncatedNormal")
 
         super(TruncatedNormal, self).__init__(
-            defaults=('_defaultval',), transform=transform,
-            lower=lower, upper=upper, *args, **kwargs)
+            defaults=("_defaultval",),
+            transform=transform,
+            lower=lower,
+            upper=upper,
+            *args,
+            **kwargs
+        )
 
     def random(self, point=None, size=None):
         """
@@ -613,15 +651,17 @@ def random(self, point=None, size=None):
         array
         """
         mu_v, std_v, a_v, b_v = draw_values(
-            [self.mu, self.sd, self.lower, self.upper], point=point, size=size)
-        return generate_samples(stats.truncnorm.rvs,
-                                a=(a_v - mu_v)/std_v,
-                                b=(b_v - mu_v) / std_v,
-                                loc=mu_v,
-                                scale=std_v,
-                                dist_shape=self.shape,
-                                size=size,
-                                )
+            [self.mu, self.sd, self.lower, self.upper], point=point, size=size
+        )
+        return generate_samples(
+            stats.truncnorm.rvs,
+            a=(a_v - mu_v) / std_v,
+            b=(b_v - mu_v) / std_v,
+            loc=mu_v,
+            scale=std_v,
+            dist_shape=self.shape,
+            size=size,
+        )
 
     def logp(self, value):
         """
@@ -654,7 +694,7 @@ def _normalization(self):
         mu, sd = self.mu, self.sd
 
         if self.lower is None and self.upper is None:
-            return 0.
+            return 0.0
 
         if self.lower is not None and self.upper is not None:
             lcdf_a = normal_lcdf(mu, sd, self.lower)
@@ -663,9 +703,7 @@ def _normalization(self):
             lsf_b = normal_lccdf(mu, sd, self.upper)
 
             return tt.switch(
-                self.lower > 0,
-                logdiffexp(lsf_a, lsf_b),
-                logdiffexp(lcdf_b, lcdf_a),
+                self.lower > 0, logdiffexp(lsf_a, lsf_b), logdiffexp(lcdf_b, lcdf_a)
             )
 
         if self.lower is not None:
@@ -676,11 +714,10 @@ def _normalization(self):
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
-        name = r'\text{%s}' % name
+        name = r"\text{%s}" % name
         return (
-            r'${} \sim \text{{TruncatedNormal}}('
-            '\mathit{{mu}}={},~\mathit{{sd}}={},a={},b={})$'
-            .format(
+            r"${} \sim \text{{TruncatedNormal}}("
+            "\mathit{{mu}}={},~\mathit{{sd}}={},a={},b={})$".format(
                 name,
                 get_variable_name(self.mu),
                 get_variable_name(self.sd),
@@ -691,7 +728,7 @@ def _repr_latex_(self, name=None, dist=None):
 
 
 class HalfNormal(PositiveContinuous):
-    R"""
+    r"""
     Half-normal log-likelihood.
 
     The pdf of this distribution is
@@ -761,10 +798,10 @@ def __init__(self, sd=None, tau=None, *args, **kwargs):
         self.tau = tau = tt.as_tensor_variable(tau)
 
         self.mean = tt.sqrt(2 / (np.pi * self.tau))
-        self.variance = (1. - 2 / np.pi) / self.tau
+        self.variance = (1.0 - 2 / np.pi) / self.tau
 
-        assert_negative_support(tau, 'tau', 'HalfNormal')
-        assert_negative_support(sd, 'sd', 'HalfNormal')
+        assert_negative_support(tau, "tau", "HalfNormal")
+        assert_negative_support(sd, "sd", "HalfNormal")
 
     def random(self, point=None, size=None):
         """
@@ -784,9 +821,9 @@ def random(self, point=None, size=None):
         array
         """
         sd = draw_values([self.sd], point=point)[0]
-        return generate_samples(stats.halfnorm.rvs, loc=0., scale=sd,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            stats.halfnorm.rvs, loc=0.0, scale=sd, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -804,30 +841,34 @@ def logp(self, value):
         """
         tau = self.tau
         sd = self.sd
-        return bound(-0.5 * tau * value**2 + 0.5 * tt.log(tau * 2. / np.pi),
-                     value >= 0,
-                     tau > 0, sd > 0)
+        return bound(
+            -0.5 * tau * value ** 2 + 0.5 * tt.log(tau * 2.0 / np.pi),
+            value >= 0,
+            tau > 0,
+            sd > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         sd = dist.sd
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{HalfNormal}}(\mathit{{sd}}={})$'.format(name,
-                                                                         get_variable_name(sd))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{HalfNormal}}(\mathit{{sd}}={})$".format(
+            name, get_variable_name(sd)
+        )
 
     def logcdf(self, value):
         sd = self.sd
         z = zvalue(value, mu=0, sd=sd)
         return tt.switch(
             tt.lt(z, -1.0),
-            tt.log(tt.erfcx(-z / tt.sqrt(2.))) - tt.sqr(z),
-            tt.log1p(-tt.erfc(z / tt.sqrt(2.)))
+            tt.log(tt.erfcx(-z / tt.sqrt(2.0))) - tt.sqr(z),
+            tt.log1p(-tt.erfc(z / tt.sqrt(2.0))),
         )
 
 
 class Wald(PositiveContinuous):
-    R"""
+    r"""
     Wald log-likelihood.
 
     The pdf of this distribution is
@@ -904,7 +945,7 @@ class Wald(PositiveContinuous):
        statmod: Probability Calculations for the Inverse Gaussian Distribution
     """
 
-    def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
+    def __init__(self, mu=None, lam=None, phi=None, alpha=0.0, *args, **kwargs):
         super(Wald, self).__init__(*args, **kwargs)
         mu, lam, phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha = tt.as_tensor_variable(alpha)
@@ -913,13 +954,19 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
         self.phi = phi = tt.as_tensor_variable(phi)
 
         self.mean = self.mu + self.alpha
-        self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2)
-                               - 1.5 * self.mu / self.lam) + self.alpha
-        self.variance = (self.mu**3) / self.lam
+        self.mode = (
+            self.mu
+            * (
+                tt.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2)
+                - 1.5 * self.mu / self.lam
+            )
+            + self.alpha
+        )
+        self.variance = (self.mu ** 3) / self.lam
 
-        assert_negative_support(phi, 'phi', 'Wald')
-        assert_negative_support(mu, 'mu', 'Wald')
-        assert_negative_support(lam, 'lam', 'Wald')
+        assert_negative_support(phi, "phi", "Wald")
+        assert_negative_support(mu, "mu", "Wald")
+        assert_negative_support(lam, "lam", "Wald")
 
     def get_mu_lam_phi(self, mu, lam, phi):
         if mu is None:
@@ -928,23 +975,28 @@ def get_mu_lam_phi(self, mu, lam, phi):
         else:
             if lam is None:
                 if phi is None:
-                    return mu, 1., 1. / mu
+                    return mu, 1.0, 1.0 / mu
                 else:
                     return mu, mu * phi, phi
             else:
                 if phi is None:
                     return mu, lam, lam / mu
 
-        raise ValueError('Wald distribution must specify either mu only, '
-                         'mu and lam, mu and phi, or lam and phi.')
+        raise ValueError(
+            "Wald distribution must specify either mu only, "
+            "mu and lam, mu and phi, or lam and phi."
+        )
 
     def _random(self, mu, lam, alpha, size=None):
-        v = np.random.normal(size=size)**2
-        value = (mu + (mu**2) * v / (2. * lam) - mu / (2. * lam)
-                 * np.sqrt(4. * mu * lam * v + (mu * v)**2))
+        v = np.random.normal(size=size) ** 2
+        value = (
+            mu
+            + (mu ** 2) * v / (2.0 * lam)
+            - mu / (2.0 * lam) * np.sqrt(4.0 * mu * lam * v + (mu * v) ** 2)
+        )
         z = np.random.uniform(size=size)
         i = np.floor(z - mu / (mu + value)) * 2 + 1
-        value = (value**-i) * (mu**(i + 1))
+        value = (value ** -i) * (mu ** (i + 1))
         return value + alpha
 
     def random(self, point=None, size=None):
@@ -964,12 +1016,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha],
-                                     point=point, size=size)
-        return generate_samples(self._random,
-                                mu, lam, alpha,
-                                dist_shape=self.shape,
-                                size=size)
+        mu, lam, alpha = draw_values(
+            [self.mu, self.lam, self.alpha], point=point, size=size
+        )
+        return generate_samples(
+            self._random, mu, lam, alpha, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -989,13 +1041,17 @@ def logp(self, value):
         lam = self.lam
         alpha = self.alpha
         # value *must* be iid. Otherwise this is wrong.
-        return bound(logpow(lam / (2. * np.pi), 0.5)
-                     - logpow(value - alpha, 1.5)
-                     - (0.5 * lam / (value - alpha)
-                        * ((value - alpha - mu) / mu)**2),
-                     # XXX these two are redundant. Please, check.
-                     value > 0, value - alpha > 0,
-                     mu > 0, lam > 0, alpha >= 0)
+        return bound(
+            logpow(lam / (2.0 * np.pi), 0.5)
+            - logpow(value - alpha, 1.5)
+            - (0.5 * lam / (value - alpha) * ((value - alpha - mu) / mu) ** 2),
+            # XXX these two are redundant. Please, check.
+            value > 0,
+            value - alpha > 0,
+            mu > 0,
+            lam > 0,
+            alpha >= 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -1003,11 +1059,13 @@ def _repr_latex_(self, name=None, dist=None):
         lam = dist.lam
         mu = dist.mu
         alpha = dist.alpha
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Wald}}(\mathit{{mu}}={},~\mathit{{lam}}={},~\mathit{{alpha}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(lam),
-                                                                get_variable_name(alpha))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Wald}}(\mathit{{mu}}={},~\mathit{{lam}}={},~\mathit{{alpha}}={})$".format(
+            name,
+            get_variable_name(mu),
+            get_variable_name(lam),
+            get_variable_name(alpha),
+        )
 
     def logcdf(self, value):
         # Distribution parameters
@@ -1020,38 +1078,35 @@ def logcdf(self, value):
         l = lam * mu
         r = tt.sqrt(value * lam)
 
-        a = normal_lcdf(0, 1, (q - 1.)/r)
-        b = 2./l + normal_lcdf(0, 1, -(q + 1.)/r)
+        a = normal_lcdf(0, 1, (q - 1.0) / r)
+        b = 2.0 / l + normal_lcdf(0, 1, -(q + 1.0) / r)
         return tt.switch(
             (
                 # Left limit
-                tt.lt(value, 0) |
-                (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) |
-                (tt.lt(value, mu) & tt.eq(lam, 0))
+                tt.lt(value, 0)
+                | (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf))
+                | (tt.lt(value, mu) & tt.eq(lam, 0))
             ),
             -np.inf,
             tt.switch(
                 (
                     # Right limit
-                    tt.eq(value, np.inf) |
-                    (tt.eq(lam, 0) & tt.gt(value, mu)) |
-                    (tt.gt(value, 0) & tt.eq(lam, np.inf)) |
+                    tt.eq(value, np.inf)
+                    | (tt.eq(lam, 0) & tt.gt(value, mu))
+                    | (tt.gt(value, 0) & tt.eq(lam, np.inf))
+                    |
                     # Degenerate distribution
-                    (
-                        tt.lt(mu, np.inf) &
-                        tt.eq(mu, value) &
-                        tt.eq(lam, 0)
-                    ) |
-                    (tt.eq(value, 0) & tt.eq(lam, np.inf))
+                    (tt.lt(mu, np.inf) & tt.eq(mu, value) & tt.eq(lam, 0))
+                    | (tt.eq(value, 0) & tt.eq(lam, np.inf))
                 ),
                 0,
-                a + tt.log1p(tt.exp(b - a))
-            )
+                a + tt.log1p(tt.exp(b - a)),
+            ),
         )
 
 
 class Beta(UnitContinuous):
-    R"""
+    r"""
     Beta log-likelihood.
 
     The pdf of this distribution is
@@ -1113,8 +1168,7 @@ class Beta(UnitContinuous):
     the binomial distribution.
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None,
-                 *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
         super(Beta, self).__init__(*args, **kwargs)
 
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
@@ -1122,22 +1176,27 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None,
         self.beta = beta = tt.as_tensor_variable(beta)
 
         self.mean = self.alpha / (self.alpha + self.beta)
-        self.variance = self.alpha * self.beta / (
-            (self.alpha + self.beta)**2 * (self.alpha + self.beta + 1))
+        self.variance = (
+            self.alpha
+            * self.beta
+            / ((self.alpha + self.beta) ** 2 * (self.alpha + self.beta + 1))
+        )
 
-        assert_negative_support(alpha, 'alpha', 'Beta')
-        assert_negative_support(beta, 'beta', 'Beta')
+        assert_negative_support(alpha, "alpha", "Beta")
+        assert_negative_support(beta, "beta", "Beta")
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
             pass
         elif (mu is not None) and (sd is not None):
-            kappa = mu * (1 - mu) / sd**2 - 1
+            kappa = mu * (1 - mu) / sd ** 2 - 1
             alpha = mu * kappa
             beta = (1 - mu) * kappa
         else:
-            raise ValueError('Incompatible parameterization. Either use alpha '
-                             'and beta, or mu and sd to specify distribution.')
+            raise ValueError(
+                "Incompatible parameterization. Either use alpha "
+                "and beta, or mu and sd to specify distribution."
+            )
 
         return alpha, beta
 
@@ -1158,11 +1217,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta],
-                                  point=point, size=size)
-        return generate_samples(stats.beta.rvs, alpha, beta,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        return generate_samples(
+            stats.beta.rvs, alpha, beta, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -1183,13 +1241,13 @@ def logp(self, value):
 
         logval = tt.log(value)
         log1pval = tt.log1p(-value)
-        logp = (tt.switch(tt.eq(alpha, 1), 0, (alpha - 1) * logval)
-                + tt.switch(tt.eq(beta, 1), 0, (beta - 1) * log1pval)
-                - betaln(alpha, beta))
+        logp = (
+            tt.switch(tt.eq(alpha, 1), 0, (alpha - 1) * logval)
+            + tt.switch(tt.eq(beta, 1), 0, (beta - 1) * log1pval)
+            - betaln(alpha, beta)
+        )
 
-        return bound(logp,
-                     value >= 0, value <= 1,
-                     alpha > 0, beta > 0)
+        return bound(logp, value >= 0, value <= 1, alpha > 0, beta > 0)
 
     def logcdf(self, value):
         value = floatX(tt.as_tensor(value))
@@ -1198,11 +1256,7 @@ def logcdf(self, value):
         return tt.switch(
             tt.le(value, 0),
             -np.inf,
-            tt.switch(
-                tt.ge(value, 1),
-                0,
-                tt.log(incomplete_beta(a, b, value))
-            )
+            tt.switch(tt.ge(value, 1), 0, tt.log(incomplete_beta(a, b, value))),
         )
 
     def _repr_latex_(self, name=None, dist=None):
@@ -1210,13 +1264,14 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         alpha = dist.alpha
         beta = dist.beta
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Beta}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(alpha),
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Beta}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(beta)
+        )
+
 
 class Kumaraswamy(UnitContinuous):
-    R"""
+    r"""
     Kumaraswamy log-likelihood.
 
     The pdf of this distribution is
@@ -1263,13 +1318,23 @@ def __init__(self, a, b, *args, **kwargs):
         self.a = a = tt.as_tensor_variable(a)
         self.b = b = tt.as_tensor_variable(b)
 
-        ln_mean = tt.log(b) + tt.gammaln(1 + 1 / a) + tt.gammaln(b) - tt.gammaln(1 + 1 / a + b)
+        ln_mean = (
+            tt.log(b)
+            + tt.gammaln(1 + 1 / a)
+            + tt.gammaln(b)
+            - tt.gammaln(1 + 1 / a + b)
+        )
         self.mean = tt.exp(ln_mean)
-        ln_2nd_raw_moment = tt.log(b) + tt.gammaln(1 + 2 / a) + tt.gammaln(b) - tt.gammaln(1 + 2 / a + b)
+        ln_2nd_raw_moment = (
+            tt.log(b)
+            + tt.gammaln(1 + 2 / a)
+            + tt.gammaln(b)
+            - tt.gammaln(1 + 2 / a + b)
+        )
         self.variance = tt.exp(ln_2nd_raw_moment) - self.mean ** 2
 
-        assert_negative_support(a, 'a', 'Kumaraswamy')
-        assert_negative_support(b, 'b', 'Kumaraswamy')
+        assert_negative_support(a, "a", "Kumaraswamy")
+        assert_negative_support(b, "b", "Kumaraswamy")
 
     def _random(self, a, b, size=None):
         u = np.random.uniform(size=size)
@@ -1292,11 +1357,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        a, b = draw_values([self.a, self.b],
-                           point=point, size=size)
-        return generate_samples(self._random, a, b,
-                                dist_shape=self.shape,
-                                size=size)
+        a, b = draw_values([self.a, self.b], point=point, size=size)
+        return generate_samples(self._random, a, b, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1315,25 +1377,28 @@ def logp(self, value):
         a = self.a
         b = self.b
 
-        logp = tt.log(a) + tt.log(b) + (a - 1) * tt.log(value) + (b - 1) * tt.log(1 - value ** a)
+        logp = (
+            tt.log(a)
+            + tt.log(b)
+            + (a - 1) * tt.log(value)
+            + (b - 1) * tt.log(1 - value ** a)
+        )
 
-        return bound(logp,
-                     value >= 0, value <= 1,
-                     a > 0, b > 0)
+        return bound(logp, value >= 0, value <= 1, a > 0, b > 0)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         a = dist.a
         b = dist.b
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Kumaraswamy}}(\mathit{{a}}={},~\mathit{{b}}={})$'.format(name,
-                                                                                          get_variable_name(a),
-                                                                                          get_variable_name(b))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Kumaraswamy}}(\mathit{{a}}={},~\mathit{{b}}={})$".format(
+            name, get_variable_name(a), get_variable_name(b)
+        )
 
 
 class Exponential(PositiveContinuous):
-    R"""
+    r"""
     Exponential log-likelihood.
 
     The pdf of this distribution is
@@ -1372,13 +1437,13 @@ class Exponential(PositiveContinuous):
     def __init__(self, lam, *args, **kwargs):
         super(Exponential, self).__init__(*args, **kwargs)
         self.lam = lam = tt.as_tensor_variable(lam)
-        self.mean = 1. / self.lam
+        self.mean = 1.0 / self.lam
         self.median = self.mean * tt.log(2)
         self.mode = tt.zeros_like(self.lam)
 
-        self.variance = self.lam**-2
+        self.variance = self.lam ** -2
 
-        assert_negative_support(lam, 'lam', 'Exponential')
+        assert_negative_support(lam, "lam", "Exponential")
 
     def random(self, point=None, size=None):
         """
@@ -1398,9 +1463,9 @@ def random(self, point=None, size=None):
         array
         """
         lam = draw_values([self.lam], point=point, size=size)[0]
-        return generate_samples(np.random.exponential, scale=1. / lam,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            np.random.exponential, scale=1.0 / lam, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -1423,9 +1488,10 @@ def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         lam = dist.lam
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Exponential}}(\mathit{{lam}}={})$'.format(name,
-                                                                get_variable_name(lam))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Exponential}}(\mathit{{lam}}={})$".format(
+            name, get_variable_name(lam)
+        )
 
     def logcdf(self, value):
         """
@@ -1444,15 +1510,13 @@ def logcdf(self, value):
             tt.le(value, 0.0),
             -np.inf,
             tt.switch(
-                tt.le(a, tt.log(2.0)),
-                tt.log(-tt.expm1(-a)),
-                tt.log1p(-tt.exp(-a)),
-            )
+                tt.le(a, tt.log(2.0)), tt.log(-tt.expm1(-a)), tt.log1p(-tt.exp(-a))
+            ),
         )
 
 
 class Laplace(Continuous):
-    R"""
+    r"""
     Laplace log-likelihood.
 
     The pdf of this distribution is
@@ -1498,9 +1562,9 @@ def __init__(self, mu, b, *args, **kwargs):
         self.b = b = tt.as_tensor_variable(b)
         self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
 
-        self.variance = 2 * self.b**2
+        self.variance = 2 * self.b ** 2
 
-        assert_negative_support(b, 'b', 'Laplace')
+        assert_negative_support(b, "b", "Laplace")
 
     def random(self, point=None, size=None):
         """
@@ -1520,9 +1584,9 @@ def random(self, point=None, size=None):
         array
         """
         mu, b = draw_values([self.mu, self.b], point=point, size=size)
-        return generate_samples(np.random.laplace, mu, b,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            np.random.laplace, mu, b, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -1548,10 +1612,10 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         b = dist.b
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Laplace}}(\mathit{{mu}}={},~\mathit{{b}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(b))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Laplace}}(\mathit{{mu}}={},~\mathit{{b}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(b)
+        )
 
     def logcdf(self, value):
         a = self.mu
@@ -1561,15 +1625,13 @@ def logcdf(self, value):
             tt.le(value, a),
             tt.log(0.5) + y,
             tt.switch(
-                tt.gt(y, 1),
-                tt.log1p(-0.5 * tt.exp(-y)),
-                tt.log(1 - 0.5 * tt.exp(-y))
-            )
+                tt.gt(y, 1), tt.log1p(-0.5 * tt.exp(-y)), tt.log(1 - 0.5 * tt.exp(-y))
+            ),
         )
 
 
 class Lognormal(PositiveContinuous):
-    R"""
+    r"""
     Log-normal log-likelihood.
 
     Distribution of any random variable whose logarithm is normally
@@ -1637,17 +1699,19 @@ def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs):
         self.tau = tau = tt.as_tensor_variable(tau)
         self.sd = sd = tt.as_tensor_variable(sd)
 
-        self.mean = tt.exp(self.mu + 1. / (2 * self.tau))
+        self.mean = tt.exp(self.mu + 1.0 / (2 * self.tau))
         self.median = tt.exp(self.mu)
-        self.mode = tt.exp(self.mu - 1. / self.tau)
-        self.variance = (tt.exp(1. / self.tau) - 1) * tt.exp(2 * self.mu + 1. / self.tau)
+        self.mode = tt.exp(self.mu - 1.0 / self.tau)
+        self.variance = (tt.exp(1.0 / self.tau) - 1) * tt.exp(
+            2 * self.mu + 1.0 / self.tau
+        )
 
-        assert_negative_support(tau, 'tau', 'Lognormal')
-        assert_negative_support(sd, 'sd', 'Lognormal')
+        assert_negative_support(tau, "tau", "Lognormal")
+        assert_negative_support(sd, "sd", "Lognormal")
 
     def _random(self, mu, tau, size=None):
         samples = np.random.normal(size=size)
-        return np.exp(mu + (tau**-0.5) * samples)
+        return np.exp(mu + (tau ** -0.5) * samples)
 
     def random(self, point=None, size=None):
         """
@@ -1667,9 +1731,7 @@ def random(self, point=None, size=None):
         array
         """
         mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
-        return generate_samples(self._random, mu, tau,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(self._random, mu, tau, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1687,20 +1749,22 @@ def logp(self, value):
         """
         mu = self.mu
         tau = self.tau
-        return bound(-0.5 * tau * (tt.log(value) - mu)**2
-                     + 0.5 * tt.log(tau / (2. * np.pi))
-                     - tt.log(value),
-                     tau > 0)
+        return bound(
+            -0.5 * tau * (tt.log(value) - mu) ** 2
+            + 0.5 * tt.log(tau / (2.0 * np.pi))
+            - tt.log(value),
+            tau > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         tau = dist.tau
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Lognormal}}(\mathit{{mu}}={},~\mathit{{tau}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(tau))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Lognormal}}(\mathit{{mu}}={},~\mathit{{tau}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(tau)
+        )
 
     def logcdf(self, value):
         mu = self.mu
@@ -1712,15 +1776,14 @@ def logcdf(self, value):
             -np.inf,
             tt.switch(
                 tt.lt(z, -1.0),
-                tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
-                tt.sqr(z) / 2,
-                tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
-            )
+                tt.log(tt.erfcx(-z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2,
+                tt.log1p(-tt.erfc(z / tt.sqrt(2.0)) / 2.0),
+            ),
         )
 
 
 class StudentT(Continuous):
-    R"""
+    r"""
     Student's T log-likelihood.
 
     Describes a normal variable whose precision is gamma distributed.
@@ -1790,12 +1853,12 @@ def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs):
         self.sd = sd = tt.as_tensor_variable(sd)
         self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
 
-        self.variance = tt.switch((nu > 2) * 1,
-                                  (1 / self.lam) * (nu / (nu - 2)),
-                                  np.inf)
+        self.variance = tt.switch(
+            (nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf
+        )
 
-        assert_negative_support(lam, 'lam (sd)', 'StudentT')
-        assert_negative_support(nu, 'nu', 'StudentT')
+        assert_negative_support(lam, "lam (sd)", "StudentT")
+        assert_negative_support(nu, "nu", "StudentT")
 
     def random(self, point=None, size=None):
         """
@@ -1814,11 +1877,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        nu, mu, lam = draw_values([self.nu, self.mu, self.lam],
-                                  point=point, size=size)
-        return generate_samples(stats.t.rvs, nu, loc=mu, scale=lam**-0.5,
-                                dist_shape=self.shape,
-                                size=size)
+        nu, mu, lam = draw_values([self.nu, self.mu, self.lam], point=point, size=size)
+        return generate_samples(
+            stats.t.rvs, nu, loc=mu, scale=lam ** -0.5, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -1839,11 +1901,15 @@ def logp(self, value):
         lam = self.lam
         sd = self.sd
 
-        return bound(gammaln((nu + 1.0) / 2.0)
-                     + .5 * tt.log(lam / (nu * np.pi))
-                     - gammaln(nu / 2.0)
-                     - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu)**2 / nu),
-                     lam > 0, nu > 0, sd > 0)
+        return bound(
+            gammaln((nu + 1.0) / 2.0)
+            + 0.5 * tt.log(lam / (nu * np.pi))
+            - gammaln(nu / 2.0)
+            - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu) ** 2 / nu),
+            lam > 0,
+            nu > 0,
+            sd > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -1851,24 +1917,23 @@ def _repr_latex_(self, name=None, dist=None):
         nu = dist.nu
         mu = dist.mu
         lam = dist.lam
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{StudentT}}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{lam}}={})$'.format(name,
-                                                                get_variable_name(nu),
-                                                                get_variable_name(mu),
-                                                                get_variable_name(lam))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{StudentT}}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{lam}}={})$".format(
+            name, get_variable_name(nu), get_variable_name(mu), get_variable_name(lam)
+        )
 
     def logcdf(self, value):
         nu = self.nu
         mu = self.mu
         sd = self.sd
-        t = (value - mu)/sd
-        sqrt_t2_nu = tt.sqrt(t**2 + nu)
-        x = (t + sqrt_t2_nu)/(2.0 * sqrt_t2_nu)
-        return tt.log(incomplete_beta(nu/2., nu/2., x))
+        t = (value - mu) / sd
+        sqrt_t2_nu = tt.sqrt(t ** 2 + nu)
+        x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu)
+        return tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x))
 
 
 class Pareto(Continuous):
-    R"""
+    r"""
     Pareto log-likelihood.
 
     Often used to characterize wealth distribution, or other examples of the
@@ -1912,28 +1977,28 @@ class Pareto(Continuous):
         Scale parameter (m > 0).
     """
 
-    def __init__(self, alpha, m, transform='lowerbound', *args, **kwargs):
+    def __init__(self, alpha, m, transform="lowerbound", *args, **kwargs):
         self.alpha = alpha = tt.as_tensor_variable(alpha)
         self.m = m = tt.as_tensor_variable(m)
 
-        self.mean = tt.switch(tt.gt(alpha, 1), alpha *
-                              m / (alpha - 1.), np.inf)
-        self.median = m * 2.**(1. / alpha)
+        self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf)
+        self.median = m * 2.0 ** (1.0 / alpha)
         self.variance = tt.switch(
             tt.gt(alpha, 2),
-            (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2),
-            np.inf)
+            (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2),
+            np.inf,
+        )
 
-        assert_negative_support(alpha, 'alpha', 'Pareto')
-        assert_negative_support(m, 'm', 'Pareto')
+        assert_negative_support(alpha, "alpha", "Pareto")
+        assert_negative_support(m, "m", "Pareto")
 
-        if transform == 'lowerbound':
+        if transform == "lowerbound":
             transform = transforms.lowerbound(self.m)
         super(Pareto, self).__init__(transform=transform, *args, **kwargs)
 
     def _random(self, alpha, m, size=None):
         u = np.random.uniform(size=size)
-        return m * (1. - u)**(-1. / alpha)
+        return m * (1.0 - u) ** (-1.0 / alpha)
 
     def random(self, point=None, size=None):
         """
@@ -1952,11 +2017,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, m = draw_values([self.alpha, self.m],
-                               point=point, size=size)
-        return generate_samples(self._random, alpha, m,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, m = draw_values([self.alpha, self.m], point=point, size=size)
+        return generate_samples(
+            self._random, alpha, m, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -1974,19 +2038,22 @@ def logp(self, value):
         """
         alpha = self.alpha
         m = self.m
-        return bound(tt.log(alpha) + logpow(m, alpha)
-                     - logpow(value, alpha + 1),
-                     value >= m, alpha > 0, m > 0)
+        return bound(
+            tt.log(alpha) + logpow(m, alpha) - logpow(value, alpha + 1),
+            value >= m,
+            alpha > 0,
+            m > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         alpha = dist.alpha
         m = dist.m
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Pareto}}(\mathit{{alpha}}={},~\mathit{{m}}={})$'.format(name,
-                                                                get_variable_name(alpha),
-                                                                get_variable_name(m))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Pareto}}(\mathit{{alpha}}={},~\mathit{{m}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(m)
+        )
 
     def logcdf(self, value):
         m = self.m
@@ -1995,16 +2062,12 @@ def logcdf(self, value):
         return tt.switch(
             tt.lt(value, m),
             -np.inf,
-            tt.switch(
-                tt.le(arg, 1e-5),
-                tt.log1p(-arg),
-                tt.log(1 - arg)
-            )
+            tt.switch(tt.le(arg, 1e-5), tt.log1p(-arg), tt.log(1 - arg)),
         )
 
 
 class Cauchy(Continuous):
-    R"""
+    r"""
     Cauchy log-likelihood.
 
     Also known as the Lorentz or the Breit-Wigner distribution.
@@ -2053,7 +2116,7 @@ def __init__(self, alpha, beta, *args, **kwargs):
         self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha)
         self.beta = tt.as_tensor_variable(beta)
 
-        assert_negative_support(beta, 'beta', 'Cauchy')
+        assert_negative_support(beta, "beta", "Cauchy")
 
     def _random(self, alpha, beta, size=None):
         u = np.random.uniform(size=size)
@@ -2076,11 +2139,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta],
-                                  point=point, size=size)
-        return generate_samples(self._random, alpha, beta,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        return generate_samples(
+            self._random, alpha, beta, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -2098,28 +2160,27 @@ def logp(self, value):
         """
         alpha = self.alpha
         beta = self.beta
-        return bound(- tt.log(np.pi) - tt.log(beta)
-                     - tt.log1p(((value - alpha) / beta)**2),
-                     beta > 0)
+        return bound(
+            -tt.log(np.pi) - tt.log(beta) - tt.log1p(((value - alpha) / beta) ** 2),
+            beta > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         alpha = dist.alpha
         beta = dist.beta
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Cauchy}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(alpha),
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Cauchy}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(beta)
+        )
 
     def logcdf(self, value):
-        return tt.log(
-            0.5 + tt.arctan((value - self.alpha) / self.beta) / np.pi
-        )
+        return tt.log(0.5 + tt.arctan((value - self.alpha) / self.beta) / np.pi)
 
 
 class HalfCauchy(PositiveContinuous):
-    R"""
+    r"""
     Half-Cauchy log-likelihood.
 
     The pdf of this distribution is
@@ -2162,7 +2223,7 @@ def __init__(self, beta, *args, **kwargs):
         self.median = tt.as_tensor_variable(beta)
         self.beta = tt.as_tensor_variable(beta)
 
-        assert_negative_support(beta, 'beta', 'HalfCauchy')
+        assert_negative_support(beta, "beta", "HalfCauchy")
 
     def _random(self, beta, size=None):
         u = np.random.uniform(size=size)
@@ -2186,9 +2247,7 @@ def random(self, point=None, size=None):
         array
         """
         beta = draw_values([self.beta], point=point, size=size)[0]
-        return generate_samples(self._random, beta,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(self._random, beta, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -2205,29 +2264,29 @@ def logp(self, value):
         TensorVariable
         """
         beta = self.beta
-        return bound(tt.log(2) - tt.log(np.pi) - tt.log(beta)
-                     - tt.log1p((value / beta)**2),
-                     value >= 0, beta > 0)
+        return bound(
+            tt.log(2) - tt.log(np.pi) - tt.log(beta) - tt.log1p((value / beta) ** 2),
+            value >= 0,
+            beta > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         beta = dist.beta
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{HalfCauchy}}(\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{HalfCauchy}}(\mathit{{beta}}={})$".format(
+            name, get_variable_name(beta)
+        )
 
     def logcdf(self, value):
         return tt.switch(
-            tt.le(value, 0),
-            -np.inf,
-            tt.log(
-                2 * tt.arctan(value / self.beta) / np.pi
-            ))
+            tt.le(value, 0), -np.inf, tt.log(2 * tt.arctan(value / self.beta) / np.pi)
+        )
 
 
 class Gamma(PositiveContinuous):
-    R"""
+    r"""
     Gamma log-likelihood.
 
     Represents the sum of alpha exponentially distributed random variables,
@@ -2284,29 +2343,30 @@ class Gamma(PositiveContinuous):
         Alternative scale parameter (sd > 0).
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None,
-                 *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
         super(Gamma, self).__init__(*args, **kwargs)
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha = tt.as_tensor_variable(alpha)
         self.beta = beta = tt.as_tensor_variable(beta)
         self.mean = alpha / beta
         self.mode = tt.maximum((alpha - 1) / beta, 0)
-        self.variance = alpha / beta**2
+        self.variance = alpha / beta ** 2
 
-        assert_negative_support(alpha, 'alpha', 'Gamma')
-        assert_negative_support(beta, 'beta', 'Gamma')
+        assert_negative_support(alpha, "alpha", "Gamma")
+        assert_negative_support(beta, "beta", "Gamma")
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
             pass
         elif (mu is not None) and (sd is not None):
-            alpha = mu**2 / sd**2
-            beta = mu / sd**2
+            alpha = mu ** 2 / sd ** 2
+            beta = mu / sd ** 2
         else:
-            raise ValueError('Incompatible parameterization. Either use '
-                             'alpha and beta, or mu and sd to specify '
-                             'distribution.')
+            raise ValueError(
+                "Incompatible parameterization. Either use "
+                "alpha and beta, or mu and sd to specify "
+                "distribution."
+            )
 
         return alpha, beta
 
@@ -2327,11 +2387,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta],
-                                  point=point, size=size)
-        return generate_samples(stats.gamma.rvs, alpha, scale=1. / beta,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        return generate_samples(
+            stats.gamma.rvs, alpha, scale=1.0 / beta, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -2350,25 +2409,28 @@ def logp(self, value):
         alpha = self.alpha
         beta = self.beta
         return bound(
-            -gammaln(alpha) + logpow(
-                beta, alpha) - beta * value + logpow(value, alpha - 1),
+            -gammaln(alpha)
+            + logpow(beta, alpha)
+            - beta * value
+            + logpow(value, alpha - 1),
             value >= 0,
             alpha > 0,
-            beta > 0)
+            beta > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         beta = dist.beta
         alpha = dist.alpha
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Gamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(alpha),
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Gamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(beta)
+        )
 
 
 class InverseGamma(PositiveContinuous):
-    R"""
+    r"""
     Inverse gamma log-likelihood, the reciprocal of the gamma distribution.
 
     The pdf of this distribution is
@@ -2416,22 +2478,22 @@ class InverseGamma(PositiveContinuous):
     """
 
     def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
-        super(InverseGamma, self).__init__(*args, defaults=('mode',), **kwargs)
+        super(InverseGamma, self).__init__(*args, defaults=("mode",), **kwargs)
 
         alpha, beta = InverseGamma._get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha = tt.as_tensor_variable(alpha)
         self.beta = beta = tt.as_tensor_variable(beta)
 
         self.mean = self._calculate_mean()
-        self.mode = beta / (alpha + 1.)
-        self.variance = tt.switch(tt.gt(alpha, 2),
-                                  (beta**2) / ((alpha - 2) * (alpha - 1.)**2),
-                                  np.inf)
-        assert_negative_support(alpha, 'alpha', 'InverseGamma')
-        assert_negative_support(beta, 'beta', 'InverseGamma')
+        self.mode = beta / (alpha + 1.0)
+        self.variance = tt.switch(
+            tt.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf
+        )
+        assert_negative_support(alpha, "alpha", "InverseGamma")
+        assert_negative_support(beta, "beta", "InverseGamma")
 
     def _calculate_mean(self):
-        m = self.beta / (self.alpha - 1.)
+        m = self.beta / (self.alpha - 1.0)
         try:
             return (self.alpha > 1) * m or np.inf
         except ValueError:  # alpha is an array
@@ -2440,18 +2502,20 @@ def _calculate_mean(self):
 
     @staticmethod
     def _get_alpha_beta(alpha, beta, mu, sd):
-        if (alpha is not None):
-            if (beta is not None):
+        if alpha is not None:
+            if beta is not None:
                 pass
             else:
                 beta = 1
         elif (mu is not None) and (sd is not None):
-            alpha = (2 * sd**2 + mu**2)/sd**2
-            beta = mu * (mu**2 + sd**2) / sd**2
+            alpha = (2 * sd ** 2 + mu ** 2) / sd ** 2
+            beta = mu * (mu ** 2 + sd ** 2) / sd ** 2
         else:
-            raise ValueError('Incompatible parameterization. Either use '
-                             'alpha and (optionally) beta, or mu and sd to specify '
-                             'distribution.')
+            raise ValueError(
+                "Incompatible parameterization. Either use "
+                "alpha and (optionally) beta, or mu and sd to specify "
+                "distribution."
+            )
 
         return alpha, beta
 
@@ -2472,11 +2536,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta],
-                                  point=point, size=size)
-        return generate_samples(stats.invgamma.rvs, a=alpha, scale=beta,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        return generate_samples(
+            stats.invgamma.rvs, a=alpha, scale=beta, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -2494,23 +2557,29 @@ def logp(self, value):
         """
         alpha = self.alpha
         beta = self.beta
-        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value
-                     + logpow(value, -alpha - 1),
-                     value > 0, alpha > 0, beta > 0)
+        return bound(
+            logpow(beta, alpha)
+            - gammaln(alpha)
+            - beta / value
+            + logpow(value, -alpha - 1),
+            value > 0,
+            alpha > 0,
+            beta > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         beta = dist.beta
         alpha = dist.alpha
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{InverseGamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(alpha),
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{InverseGamma}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(beta)
+        )
 
 
 class ChiSquared(Gamma):
-    R"""
+    r"""
     :math:`\chi^2` log-likelihood.
 
     The pdf of this distribution is
@@ -2549,20 +2618,18 @@ class ChiSquared(Gamma):
 
     def __init__(self, nu, *args, **kwargs):
         self.nu = nu = tt.as_tensor_variable(nu)
-        super(ChiSquared, self).__init__(alpha=nu / 2., beta=0.5,
-                                         *args, **kwargs)
+        super(ChiSquared, self).__init__(alpha=nu / 2.0, beta=0.5, *args, **kwargs)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         nu = dist.nu
-        name = r'\text{%s}' % name
-        return r'${} \sim \Chi^2(\mathit{{nu}}={})$'.format(name,
-                                                            get_variable_name(nu))
+        name = r"\text{%s}" % name
+        return r"${} \sim \Chi^2(\mathit{{nu}}={})$".format(name, get_variable_name(nu))
 
 
 class Weibull(PositiveContinuous):
-    R"""
+    r"""
     Weibull log-likelihood.
 
     The pdf of this distribution is
@@ -2609,16 +2676,15 @@ def __init__(self, alpha, beta, *args, **kwargs):
         super(Weibull, self).__init__(*args, **kwargs)
         self.alpha = alpha = tt.as_tensor_variable(alpha)
         self.beta = beta = tt.as_tensor_variable(beta)
-        self.mean = beta * tt.exp(gammaln(1 + 1. / alpha))
-        self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha)
-        self.variance = (beta**2) * \
-            tt.exp(gammaln(1 + 2. / alpha - self.mean**2))
-        self.mode = tt.switch(alpha >= 1,
-                              beta * ((alpha - 1)/alpha) ** (1 / alpha),
-                              0)  # Reference: https://en.wikipedia.org/wiki/Weibull_distribution
+        self.mean = beta * tt.exp(gammaln(1 + 1.0 / alpha))
+        self.median = beta * tt.exp(gammaln(tt.log(2))) ** (1.0 / alpha)
+        self.variance = (beta ** 2) * tt.exp(gammaln(1 + 2.0 / alpha - self.mean ** 2))
+        self.mode = tt.switch(
+            alpha >= 1, beta * ((alpha - 1) / alpha) ** (1 / alpha), 0
+        )  # Reference: https://en.wikipedia.org/wiki/Weibull_distribution
 
-        assert_negative_support(alpha, 'alpha', 'Weibull')
-        assert_negative_support(beta, 'beta', 'Weibull')
+        assert_negative_support(alpha, "alpha", "Weibull")
+        assert_negative_support(beta, "beta", "Weibull")
 
     def random(self, point=None, size=None):
         """
@@ -2637,15 +2703,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta],
-                                  point=point, size=size)
+        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
 
         def _random(a, b, size=None):
-            return b * (-np.log(np.random.uniform(size=size)))**(1 / a)
+            return b * (-np.log(np.random.uniform(size=size))) ** (1 / a)
 
-        return generate_samples(_random, alpha, beta,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(_random, alpha, beta, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -2663,23 +2726,28 @@ def logp(self, value):
         """
         alpha = self.alpha
         beta = self.beta
-        return bound(tt.log(alpha) - tt.log(beta)
-                     + (alpha - 1) * tt.log(value / beta)
-                     - (value / beta)**alpha,
-                     value >= 0, alpha > 0, beta > 0)
+        return bound(
+            tt.log(alpha)
+            - tt.log(beta)
+            + (alpha - 1) * tt.log(value / beta)
+            - (value / beta) ** alpha,
+            value >= 0,
+            alpha > 0,
+            beta > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         beta = dist.beta
         alpha = dist.alpha
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Weibull}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(alpha),
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Weibull}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(beta)
+        )
 
     def logcdf(self, value):
-        '''
+        """
         Compute the log CDF for the Weibull distribution
 
         References
@@ -2687,22 +2755,21 @@ def logcdf(self, value):
         .. [Machler2012] Martin Mächler (2012).
             "Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
             package"
-        '''
+        """
         alpha = self.alpha
         beta = self.beta
-        a = (value / beta)**alpha
+        a = (value / beta) ** alpha
         return tt.switch(
             tt.le(value, 0.0),
             -np.inf,
             tt.switch(
-                tt.le(a, tt.log(2.0)),
-                tt.log(-tt.expm1(-a)),
-                tt.log1p(-tt.exp(-a)))
+                tt.le(a, tt.log(2.0)), tt.log(-tt.expm1(-a)), tt.log1p(-tt.exp(-a))
+            ),
         )
 
 
 class HalfStudentT(PositiveContinuous):
-    R"""
+    r"""
     Half Student's T log-likelihood
 
     The pdf of this distribution is
@@ -2767,9 +2834,9 @@ def __init__(self, nu=1, sd=None, lam=None, *args, **kwargs):
         self.lam = tt.as_tensor_variable(lam)
         self.nu = nu = tt.as_tensor_variable(nu)
 
-        assert_negative_support(sd, 'sd', 'HalfStudentT')
-        assert_negative_support(lam, 'lam', 'HalfStudentT')
-        assert_negative_support(nu, 'nu', 'HalfStudentT')
+        assert_negative_support(sd, "sd", "HalfStudentT")
+        assert_negative_support(lam, "lam", "HalfStudentT")
+        assert_negative_support(nu, "nu", "HalfStudentT")
 
     def random(self, point=None, size=None):
         """
@@ -2789,9 +2856,11 @@ def random(self, point=None, size=None):
         array
         """
         nu, sd = draw_values([self.nu, self.sd], point=point, size=size)
-        return np.abs(generate_samples(stats.t.rvs, nu, loc=0, scale=sd,
-                                       dist_shape=self.shape,
-                                       size=size))
+        return np.abs(
+            generate_samples(
+                stats.t.rvs, nu, loc=0, scale=sd, dist_shape=self.shape, size=size
+            )
+        )
 
     def logp(self, value):
         """
@@ -2811,25 +2880,31 @@ def logp(self, value):
         sd = self.sd
         lam = self.lam
 
-        return bound(tt.log(2) + gammaln((nu + 1.0) / 2.0)
-                     - gammaln(nu / 2.0)
-                     - .5 * tt.log(nu * np.pi * sd**2)
-                     - (nu + 1.0) / 2.0 * tt.log1p(value ** 2 / (nu * sd**2)),
-                     sd > 0, lam > 0, nu > 0, value >= 0)
+        return bound(
+            tt.log(2)
+            + gammaln((nu + 1.0) / 2.0)
+            - gammaln(nu / 2.0)
+            - 0.5 * tt.log(nu * np.pi * sd ** 2)
+            - (nu + 1.0) / 2.0 * tt.log1p(value ** 2 / (nu * sd ** 2)),
+            sd > 0,
+            lam > 0,
+            nu > 0,
+            value >= 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         nu = dist.nu
         sd = dist.sd
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{HalfStudentT}}(\mathit{{nu}}={},~\mathit{{sd}}={})$'.format(name,
-                                                                get_variable_name(nu),
-                                                                get_variable_name(sd))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{HalfStudentT}}(\mathit{{nu}}={},~\mathit{{sd}}={})$".format(
+            name, get_variable_name(nu), get_variable_name(sd)
+        )
 
 
 class ExGaussian(Continuous):
-    R"""
+    r"""
     Exponentially modified Gaussian log-likelihood.
 
     Results from the convolution of a normal distribution with an exponential
@@ -2899,10 +2974,10 @@ def __init__(self, mu, sigma, nu, *args, **kwargs):
         self.sigma = sigma = tt.as_tensor_variable(sigma)
         self.nu = nu = tt.as_tensor_variable(nu)
         self.mean = mu + nu
-        self.variance = (sigma**2) + (nu**2)
+        self.variance = (sigma ** 2) + (nu ** 2)
 
-        assert_negative_support(sigma, 'sigma', 'ExGaussian')
-        assert_negative_support(nu, 'nu', 'ExGaussian')
+        assert_negative_support(sigma, "sigma", "ExGaussian")
+        assert_negative_support(nu, "nu", "ExGaussian")
 
     def random(self, point=None, size=None):
         """
@@ -2921,16 +2996,18 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
-                                    point=point, size=size)
+        mu, sigma, nu = draw_values(
+            [self.mu, self.sigma, self.nu], point=point, size=size
+        )
 
         def _random(mu, sigma, nu, size=None):
-            return (np.random.normal(mu, sigma, size=size)
-                    + np.random.exponential(scale=nu, size=size))
+            return np.random.normal(mu, sigma, size=size) + np.random.exponential(
+                scale=nu, size=size
+            )
 
-        return generate_samples(_random, mu, sigma, nu,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            _random, mu, sigma, nu, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -2951,12 +3028,15 @@ def logp(self, value):
         nu = self.nu
 
         # This condition suggested by exGAUS.R from gamlss
-        lp = tt.switch(tt.gt(nu,  0.05 * sigma),
-                       - tt.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu)**2
-                       + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.),
-                       - tt.log(sigma * tt.sqrt(2 * np.pi))
-                       - 0.5 * ((value - mu) / sigma)**2)
-        return bound(lp, sigma > 0., nu > 0.)
+        lp = tt.switch(
+            tt.gt(nu, 0.05 * sigma),
+            -tt.log(nu)
+            + (mu - value) / nu
+            + 0.5 * (sigma / nu) ** 2
+            + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.0),
+            -tt.log(sigma * tt.sqrt(2 * np.pi)) - 0.5 * ((value - mu) / sigma) ** 2,
+        )
+        return bound(lp, sigma > 0.0, nu > 0.0)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -2964,11 +3044,10 @@ def _repr_latex_(self, name=None, dist=None):
         sigma = dist.sigma
         mu = dist.mu
         nu = dist.nu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{ExGaussian}}(\mathit{{mu}}={},~\mathit{{sigma}}={},~\mathit{{nu}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(sigma),
-                                                                get_variable_name(nu))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{ExGaussian}}(\mathit{{mu}}={},~\mathit{{sigma}}={},~\mathit{{nu}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(sigma), get_variable_name(nu)
+        )
 
     def logcdf(self, value):
         """
@@ -2982,21 +3061,29 @@ def logcdf(self, value):
         """
         mu = self.mu
         sigma = self.sigma
-        sigma_2 = sigma**2
+        sigma_2 = sigma ** 2
         nu = self.nu
-        z = value - mu - sigma_2/nu
+        z = value - mu - sigma_2 / nu
         return tt.switch(
             tt.gt(nu, 0.05 * sigma),
-            tt.log(std_cdf((value - mu)/sigma) -
-                   std_cdf(z/sigma) * tt.exp(
-                       ((mu + (sigma_2/nu))**2 -
-                        (mu**2) -
-                        2 * value * ((sigma_2)/nu))/(2 * sigma_2))),
-            normal_lcdf(mu, sigma, value))
+            tt.log(
+                std_cdf((value - mu) / sigma)
+                - std_cdf(z / sigma)
+                * tt.exp(
+                    (
+                        (mu + (sigma_2 / nu)) ** 2
+                        - (mu ** 2)
+                        - 2 * value * ((sigma_2) / nu)
+                    )
+                    / (2 * sigma_2)
+                )
+            ),
+            normal_lcdf(mu, sigma, value),
+        )
 
 
 class VonMises(Continuous):
-    R"""
+    r"""
     Univariate VonMises log-likelihood.
 
     The pdf of this distribution is
@@ -3039,15 +3126,14 @@ class VonMises(Continuous):
         Concentration (\frac{1}{kappa} is analogous to \sigma^2).
     """
 
-    def __init__(self, mu=0.0, kappa=None, transform='circular',
-                 *args, **kwargs):
-        if transform == 'circular':
+    def __init__(self, mu=0.0, kappa=None, transform="circular", *args, **kwargs):
+        if transform == "circular":
             transform = transforms.Circular()
         super(VonMises, self).__init__(transform=transform, *args, **kwargs)
         self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
         self.kappa = kappa = floatX(tt.as_tensor_variable(kappa))
 
-        assert_negative_support(kappa, 'kappa', 'VonMises')
+        assert_negative_support(kappa, "kappa", "VonMises")
 
     def random(self, point=None, size=None):
         """
@@ -3066,11 +3152,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, kappa = draw_values([self.mu, self.kappa],
-                                point=point, size=size)
-        return generate_samples(stats.vonmises.rvs, loc=mu, kappa=kappa,
-                                dist_shape=self.shape,
-                                size=size)
+        mu, kappa = draw_values([self.mu, self.kappa], point=point, size=size)
+        return generate_samples(
+            stats.vonmises.rvs, loc=mu, kappa=kappa, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -3088,23 +3173,26 @@ def logp(self, value):
         """
         mu = self.mu
         kappa = self.kappa
-        return bound(kappa * tt.cos(mu - value) - (tt.log(2 * np.pi) + log_i0(kappa)),
-                     kappa > 0, value >= -np.pi, value <= np.pi)
+        return bound(
+            kappa * tt.cos(mu - value) - (tt.log(2 * np.pi) + log_i0(kappa)),
+            kappa > 0,
+            value >= -np.pi,
+            value <= np.pi,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         kappa = dist.kappa
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{VonMises}}(\mathit{{mu}}={},~\mathit{{kappa}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(kappa))
-
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{VonMises}}(\mathit{{mu}}={},~\mathit{{kappa}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(kappa)
+        )
 
 
 class SkewNormal(Continuous):
-    R"""
+    r"""
     Univariate skew-normal log-likelihood.
 
      The pdf of this distribution is
@@ -3170,11 +3258,13 @@ def __init__(self, mu=0.0, sd=None, tau=None, alpha=1, *args, **kwargs):
 
         self.alpha = alpha = tt.as_tensor_variable(alpha)
 
-        self.mean = mu + self.sd * (2 / np.pi)**0.5 * alpha / (1 + alpha**2)**0.5
-        self.variance = self.sd**2 * (1 - (2 * alpha**2) / ((1 + alpha**2) * np.pi))
+        self.mean = mu + self.sd * (2 / np.pi) ** 0.5 * alpha / (1 + alpha ** 2) ** 0.5
+        self.variance = self.sd ** 2 * (
+            1 - (2 * alpha ** 2) / ((1 + alpha ** 2) * np.pi)
+        )
 
-        assert_negative_support(tau, 'tau', 'SkewNormal')
-        assert_negative_support(sd, 'sd', 'SkewNormal')
+        assert_negative_support(tau, "tau", "SkewNormal")
+        assert_negative_support(sd, "sd", "SkewNormal")
 
     def random(self, point=None, size=None):
         """
@@ -3194,11 +3284,16 @@ def random(self, point=None, size=None):
         array
         """
         mu, tau, _, alpha = draw_values(
-            [self.mu, self.tau, self.sd, self.alpha], point=point, size=size)
-        return generate_samples(stats.skewnorm.rvs,
-                                a=alpha, loc=mu, scale=tau**-0.5,
-                                dist_shape=self.shape,
-                                size=size)
+            [self.mu, self.tau, self.sd, self.alpha], point=point, size=size
+        )
+        return generate_samples(
+            stats.skewnorm.rvs,
+            a=alpha,
+            loc=mu,
+            scale=tau ** -0.5,
+            dist_shape=self.shape,
+            size=size,
+        )
 
     def logp(self, value):
         """
@@ -3219,11 +3314,11 @@ def logp(self, value):
         mu = self.mu
         alpha = self.alpha
         return bound(
-            tt.log(1 +
-                   tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
-            + (-tau * (value - mu)**2
-               + tt.log(tau / np.pi / 2.)) / 2.,
-            tau > 0, sd > 0)
+            tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
+            + (-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0,
+            tau > 0,
+            sd > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -3231,15 +3326,14 @@ def _repr_latex_(self, name=None, dist=None):
         sd = dist.sd
         mu = dist.mu
         alpha = dist.alpha
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Skew-Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={},~\mathit{{alpha}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(sd),
-                                                                get_variable_name(alpha))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Skew-Normal}}(\mathit{{mu}}={},~\mathit{{sd}}={},~\mathit{{alpha}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(sd), get_variable_name(alpha)
+        )
 
 
 class Triangular(BoundedContinuous):
-    R"""
+    r"""
     Continuous Triangular log-likelihood
 
     The pdf of this distribution is
@@ -3290,14 +3384,12 @@ class Triangular(BoundedContinuous):
         Upper limit.
     """
 
-    def __init__(self, lower=0, upper=1, c=0.5,
-                 *args, **kwargs):
+    def __init__(self, lower=0, upper=1, c=0.5, *args, **kwargs):
         self.median = self.mean = self.c = c = tt.as_tensor_variable(c)
         self.lower = lower = tt.as_tensor_variable(lower)
         self.upper = upper = tt.as_tensor_variable(upper)
 
-        super(Triangular, self).__init__(lower=lower, upper=upper,
-                                         *args, **kwargs)
+        super(Triangular, self).__init__(lower=lower, upper=upper, *args, **kwargs)
 
     def random(self, point=None, size=None):
         """
@@ -3316,10 +3408,18 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        c, lower, upper = draw_values([self.c, self.lower, self.upper],
-                                      point=point, size=size)
-        return generate_samples(stats.triang.rvs, c=c-lower, loc=lower, scale=upper-lower,
-                                size=size, dist_shape=self.shape, random_state=None)
+        c, lower, upper = draw_values(
+            [self.c, self.lower, self.upper], point=point, size=size
+        )
+        return generate_samples(
+            stats.triang.rvs,
+            c=c - lower,
+            loc=lower,
+            scale=upper - lower,
+            size=size,
+            dist_shape=self.shape,
+            random_state=None,
+        )
 
     def logp(self, value):
         """
@@ -3338,13 +3438,19 @@ def logp(self, value):
         c = self.c
         lower = self.lower
         upper = self.upper
-        return tt.switch(alltrue_elemwise([lower <= value, value < c]),
-                         tt.log(2 * (value - lower) / ((upper - lower) * (c - lower))),
-                         tt.switch(tt.eq(value, c),
-                                   tt.log(2 / (upper - lower)),
-                                   tt.switch(alltrue_elemwise([c < value, value <= upper]),
-                                             tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))),
-                                             np.inf)))
+        return tt.switch(
+            alltrue_elemwise([lower <= value, value < c]),
+            tt.log(2 * (value - lower) / ((upper - lower) * (c - lower))),
+            tt.switch(
+                tt.eq(value, c),
+                tt.log(2 / (upper - lower)),
+                tt.switch(
+                    alltrue_elemwise([c < value, value <= upper]),
+                    tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))),
+                    np.inf,
+                ),
+            ),
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -3352,11 +3458,13 @@ def _repr_latex_(self, name=None, dist=None):
         lower = dist.lower
         upper = dist.upper
         c = dist.c
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Triangular}}(\mathit{{c}}={},~\mathit{{lower}}={},~\mathit{{upper}}={})$'.format(name,
-                                                                get_variable_name(c),
-                                                                get_variable_name(lower),
-                                                                get_variable_name(upper))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Triangular}}(\mathit{{c}}={},~\mathit{{lower}}={},~\mathit{{upper}}={})$".format(
+            name,
+            get_variable_name(c),
+            get_variable_name(lower),
+            get_variable_name(upper),
+        )
 
     def logcdf(self, value):
         l = self.lower
@@ -3371,14 +3479,14 @@ def logcdf(self, value):
                 tt.switch(
                     tt.lt(value, u),
                     tt.log1p(-((u - value) ** 2) / ((u - l) * (u - c))),
-                    0
-                )
-            )
+                    0,
+                ),
+            ),
         )
 
 
 class Gumbel(Continuous):
-    R"""
+    r"""
         Univariate Gumbel log-likelihood
 
     The pdf of this distribution is
@@ -3423,7 +3531,7 @@ def __init__(self, mu=0, beta=1.0, **kwargs):
         self.mu = tt.as_tensor_variable(mu)
         self.beta = tt.as_tensor_variable(beta)
 
-        assert_negative_support(beta, 'beta', 'Gumbel')
+        assert_negative_support(beta, "beta", "Gumbel")
 
         self.mean = self.mu + self.beta * np.euler_gamma
         self.median = self.mu - self.beta * tt.log(tt.log(2))
@@ -3450,9 +3558,9 @@ def random(self, point=None, size=None):
         array
         """
         mu, sd = draw_values([self.mu, self.beta], point=point, size=size)
-        return generate_samples(stats.gumbel_r.rvs, loc=mu, scale=sd,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            stats.gumbel_r.rvs, loc=mu, scale=sd, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -3476,20 +3584,20 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         beta = dist.beta
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Gumbel}}(\mathit{{mu}}={},~\mathit{{beta}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Gumbel}}(\mathit{{mu}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(beta)
+        )
 
     def logcdf(self, value):
         beta = self.beta
         mu = self.mu
 
-        return -tt.exp(-(value - mu)/beta)
+        return -tt.exp(-(value - mu) / beta)
 
 
 class Rice(PositiveContinuous):
-    R"""
+    r"""
     Rice distribution.
 
     .. math::
@@ -3519,10 +3627,29 @@ def __init__(self, nu=None, sd=None, *args, **kwargs):
         super(Rice, self).__init__(*args, **kwargs)
         self.nu = nu = tt.as_tensor_variable(nu)
         self.sd = sd = tt.as_tensor_variable(sd)
-        self.mean = sd * np.sqrt(np.pi / 2) * tt.exp((-nu**2 / (2 * sd**2)) / 2) * ((1 - (-nu**2 / (2 * sd**2)))
-                                 * i0(-(-nu**2 / (2 * sd**2)) / 2) - (-nu**2 / (2 * sd**2)) * i1(-(-nu**2 / (2 * sd**2)) / 2))
-        self.variance = 2 * sd**2 + nu**2 - (np.pi * sd**2 / 2) * (tt.exp((-nu**2 / (2 * sd**2)) / 2) * ((1 - (-nu**2 / (
-            2 * sd**2))) * i0(-(-nu**2 / (2 * sd**2)) / 2) - (-nu**2 / (2 * sd**2)) * i1(-(-nu**2 / (2 * sd**2)) / 2)))**2
+        self.mean = (
+            sd
+            * np.sqrt(np.pi / 2)
+            * tt.exp((-nu ** 2 / (2 * sd ** 2)) / 2)
+            * (
+                (1 - (-nu ** 2 / (2 * sd ** 2))) * i0(-(-nu ** 2 / (2 * sd ** 2)) / 2)
+                - (-nu ** 2 / (2 * sd ** 2)) * i1(-(-nu ** 2 / (2 * sd ** 2)) / 2)
+            )
+        )
+        self.variance = (
+            2 * sd ** 2
+            + nu ** 2
+            - (np.pi * sd ** 2 / 2)
+            * (
+                tt.exp((-nu ** 2 / (2 * sd ** 2)) / 2)
+                * (
+                    (1 - (-nu ** 2 / (2 * sd ** 2)))
+                    * i0(-(-nu ** 2 / (2 * sd ** 2)) / 2)
+                    - (-nu ** 2 / (2 * sd ** 2)) * i1(-(-nu ** 2 / (2 * sd ** 2)) / 2)
+                )
+            )
+            ** 2
+        )
 
     def random(self, point=None, size=None):
         """
@@ -3541,10 +3668,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        nu, sd = draw_values([self.nu, self.sd],
-                             point=point, size=size)
-        return generate_samples(stats.rice.rvs, b=nu, scale=sd, loc=0,
-                                dist_shape=self.shape, size=size)
+        nu, sd = draw_values([self.nu, self.sd], point=point, size=size)
+        return generate_samples(
+            stats.rice.rvs, b=nu, scale=sd, loc=0, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         """
@@ -3563,15 +3690,16 @@ def logp(self, value):
         nu = self.nu
         sd = self.sd
         x = value / sd
-        return bound(tt.log(x * tt.exp((-(x - nu) * (x - nu)) / 2) * i0e(x * nu) / sd),
-                     sd >= 0,
-                     nu >= 0,
-                     value > 0,
-                     )
+        return bound(
+            tt.log(x * tt.exp((-(x - nu) * (x - nu)) / 2) * i0e(x * nu) / sd),
+            sd >= 0,
+            nu >= 0,
+            value > 0,
+        )
 
 
 class Logistic(Continuous):
-    R"""
+    r"""
     Logistic log-likelihood.
 
     The pdf of this distribution is
@@ -3613,14 +3741,14 @@ class Logistic(Continuous):
         Scale (s > 0).
     """
 
-    def __init__(self, mu=0., s=1., *args, **kwargs):
+    def __init__(self, mu=0.0, s=1.0, *args, **kwargs):
         super(Logistic, self).__init__(*args, **kwargs)
 
         self.mu = tt.as_tensor_variable(mu)
         self.s = tt.as_tensor_variable(s)
 
         self.mean = self.mode = mu
-        self.variance = s**2 * np.pi**2 / 3.
+        self.variance = s ** 2 * np.pi ** 2 / 3.0
 
     def logp(self, value):
         """
@@ -3640,7 +3768,9 @@ def logp(self, value):
         s = self.s
 
         return bound(
-            -(value - mu) / s - tt.log(s) - 2 * tt.log1p(tt.exp(-(value - mu) / s)), s > 0)
+            -(value - mu) / s - tt.log(s) - 2 * tt.log1p(tt.exp(-(value - mu) / s)),
+            s > 0,
+        )
 
     def random(self, point=None, size=None):
         """
@@ -3662,20 +3792,18 @@ def random(self, point=None, size=None):
         mu, s = draw_values([self.mu, self.s], point=point, size=size)
 
         return generate_samples(
-            stats.logistic.rvs,
-            loc=mu, scale=s,
-            dist_shape=self.shape,
-            size=size)
+            stats.logistic.rvs, loc=mu, scale=s, dist_shape=self.shape, size=size
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         mu = dist.mu
         s = dist.s
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Logistic}}(\mathit{{mu}}={},~\mathit{{s}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(s))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Logistic}}(\mathit{{mu}}={},~\mathit{{s}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(s)
+        )
 
     def logcdf(self, value):
         """
@@ -3689,21 +3817,20 @@ def logcdf(self, value):
         """
         mu = self.mu
         s = self.s
-        a = -(value - mu)/s
-        return - tt.switch(
+        a = -(value - mu) / s
+        return -tt.switch(
             tt.le(a, -37),
             tt.exp(a),
             tt.switch(
                 tt.le(a, 18),
                 tt.log1p(tt.exp(a)),
-                tt.switch(
-                    tt.le(a, 33.3),
-                    tt.exp(-a) + a,
-                    a)))
+                tt.switch(tt.le(a, 33.3), tt.exp(-a) + a, a),
+            ),
+        )
 
 
 class LogitNormal(UnitContinuous):
-    R"""
+    r"""
     Logit-Normal log-likelihood.
 
     The pdf of this distribution is
@@ -3753,8 +3880,8 @@ def __init__(self, mu=0, sd=None, tau=None, **kwargs):
         self.tau = tau = tt.as_tensor_variable(tau)
 
         self.median = invlogit(mu)
-        assert_negative_support(sd, 'sd', 'LogitNormal')
-        assert_negative_support(tau, 'tau', 'LogitNormal')
+        assert_negative_support(sd, "sd", "LogitNormal")
+        assert_negative_support(tau, "tau", "LogitNormal")
 
         super(LogitNormal, self).__init__(**kwargs)
 
@@ -3775,10 +3902,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, _, sd = draw_values(
-            [self.mu, self.tau, self.sd], point=point, size=size)
-        return expit(generate_samples(stats.norm.rvs, loc=mu, scale=sd, dist_shape=self.shape,
-                                      size=size))
+        mu, _, sd = draw_values([self.mu, self.tau, self.sd], point=point, size=size)
+        return expit(
+            generate_samples(
+                stats.norm.rvs, loc=mu, scale=sd, dist_shape=self.shape, size=size
+            )
+        )
 
     def logp(self, value):
         """
@@ -3797,23 +3926,28 @@ def logp(self, value):
         sd = self.sd
         mu = self.mu
         tau = self.tau
-        return bound(-0.5 * tau * (logit(value) - mu) ** 2
-                     + 0.5 * tt.log(tau / (2. * np.pi))
-                     - tt.log(value * (1 - value)), value > 0, value < 1, tau > 0)
+        return bound(
+            -0.5 * tau * (logit(value) - mu) ** 2
+            + 0.5 * tt.log(tau / (2.0 * np.pi))
+            - tt.log(value * (1 - value)),
+            value > 0,
+            value < 1,
+            tau > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         sd = dist.sd
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{LogitNormal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$'.format(name,
-                                                                get_variable_name(mu),
-                                                                get_variable_name(sd))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{LogitNormal}}(\mathit{{mu}}={},~\mathit{{sd}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(sd)
+        )
 
 
 class Interpolated(BoundedContinuous):
-    R"""
+    r"""
     Univariate probability distribution defined as a linear interpolation
     of probability density function evaluated on some lattice of points.
 
@@ -3844,11 +3978,9 @@ def __init__(self, x_points, pdf_points, *args, **kwargs):
         self.lower = lower = tt.as_tensor_variable(x_points[0])
         self.upper = upper = tt.as_tensor_variable(x_points[-1])
 
-        super(Interpolated, self).__init__(lower=lower, upper=upper,
-                                           *args, **kwargs)
+        super(Interpolated, self).__init__(lower=lower, upper=upper, *args, **kwargs)
 
-        interp = InterpolatedUnivariateSpline(
-            x_points, pdf_points, k=1, ext='zeros')
+        interp = InterpolatedUnivariateSpline(x_points, pdf_points, k=1, ext="zeros")
         Z = interp.integral(x_points[0], x_points[-1])
 
         self.Z = tt.as_tensor_variable(Z)
@@ -3872,9 +4004,10 @@ def _argcdf(self, p):
             np.where(
                 np.abs(pdf[index]) <= 1e-8,
                 np.zeros(index.shape),
-                (p - cdf[index]) / pdf[index]
+                (p - cdf[index]) / pdf[index],
             ),
-            (-pdf[index] + np.sqrt(pdf[index] ** 2 + 2 * slope * (p - cdf[index]))) / slope
+            (-pdf[index] + np.sqrt(pdf[index] ** 2 + 2 * slope * (p - cdf[index])))
+            / slope,
         )
 
     def _random(self, size=None):
@@ -3894,9 +4027,7 @@ def random(self, size=None):
         -------
         array
         """
-        return generate_samples(self._random,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(self._random, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 86db441ae1..67da9f0694 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -10,14 +10,27 @@
 from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
 
 
-__all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'DiscreteWeibull',
-           'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
-           'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
-           'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic']
+__all__ = [
+    "Binomial",
+    "BetaBinomial",
+    "Bernoulli",
+    "DiscreteWeibull",
+    "Poisson",
+    "NegativeBinomial",
+    "ConstantDist",
+    "Constant",
+    "ZeroInflatedPoisson",
+    "ZeroInflatedBinomial",
+    "ZeroInflatedNegativeBinomial",
+    "DiscreteUniform",
+    "Geometric",
+    "Categorical",
+    "OrderedLogistic",
+]
 
 
 class Binomial(Discrete):
-    R"""
+    r"""
     Binomial log-likelihood.
 
     The discrete probability distribution of the number of successes
@@ -66,9 +79,9 @@ def __init__(self, n, p, *args, **kwargs):
 
     def random(self, point=None, size=None):
         n, p = draw_values([self.n, self.p], point=point, size=size)
-        return generate_samples(stats.binom.rvs, n=n, p=p,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            stats.binom.rvs, n=n, p=p, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         n = self.n
@@ -76,21 +89,25 @@ def logp(self, value):
 
         return bound(
             binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value),
-            0 <= value, value <= n,
-            0 <= p, p <= 1)
+            0 <= value,
+            value <= n,
+            0 <= p,
+            p <= 1,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         n = dist.n
         p = dist.p
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Binomial}}(\mathit{{n}}={},~\mathit{{p}}={})$'.format(name,
-                                                get_variable_name(n),
-                                                get_variable_name(p))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Binomial}}(\mathit{{n}}={},~\mathit{{p}}={})$".format(
+            name, get_variable_name(n), get_variable_name(p)
+        )
+
 
 class BetaBinomial(Discrete):
-    R"""
+    r"""
     Beta-binomial log-likelihood.
 
     Equivalent to binomial random variable with success probability
@@ -149,7 +166,7 @@ def __init__(self, alpha, beta, n, *args, **kwargs):
         self.alpha = alpha = tt.as_tensor_variable(alpha)
         self.beta = beta = tt.as_tensor_variable(beta)
         self.n = n = tt.as_tensor_variable(n)
-        self.mode = tt.cast(tround(alpha / (alpha + beta)), 'int8')
+        self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8")
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -163,42 +180,50 @@ def _random(self, alpha, beta, n, size=None):
 
         quotient, remainder = divmod(_p.shape[0], _n.shape[0])
         if remainder != 0:
-            raise TypeError('n has a bad size! Was cast to {}, must evenly divide {}'.format(
-                _n.shape[0], _p.shape[0]))
+            raise TypeError(
+                "n has a bad size! Was cast to {}, must evenly divide {}".format(
+                    _n.shape[0], _p.shape[0]
+                )
+            )
         if quotient != 1:
             _n = np.tile(_n, quotient)
         samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size)
         return samples
 
     def random(self, point=None, size=None):
-        alpha, beta, n = \
-            draw_values([self.alpha, self.beta, self.n], point=point, size=size)
-        return generate_samples(self._random, alpha=alpha, beta=beta, n=n,
-                                dist_shape=self.shape,
-                                size=size)
+        alpha, beta, n = draw_values(
+            [self.alpha, self.beta, self.n], point=point, size=size
+        )
+        return generate_samples(
+            self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound(binomln(self.n, value)
-                     + betaln(value + alpha, self.n - value + beta)
-                     - betaln(alpha, beta),
-                     value >= 0, value <= self.n,
-                     alpha > 0, beta > 0)
+        return bound(
+            binomln(self.n, value)
+            + betaln(value + alpha, self.n - value + beta)
+            - betaln(alpha, beta),
+            value >= 0,
+            value <= self.n,
+            alpha > 0,
+            beta > 0,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         alpha = dist.alpha
         beta = dist.beta
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{BetaBinomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$'.format(name,
-                                                get_variable_name(alpha),
-                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{BetaBinomial}}(\mathit{{alpha}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(alpha), get_variable_name(beta)
+        )
 
 
 class Bernoulli(Discrete):
-    R"""Bernoulli log-likelihood
+    r"""Bernoulli log-likelihood
 
     The Bernoulli distribution describes the probability of successes
     (x=1) and failures (x=0).
@@ -240,7 +265,7 @@ class Bernoulli(Discrete):
     def __init__(self, p=None, logit_p=None, *args, **kwargs):
         super(Bernoulli, self).__init__(*args, **kwargs)
         if sum(int(var is None) for var in [p, logit_p]) != 1:
-            raise ValueError('Specify one of p and logit_p')
+            raise ValueError("Specify one of p and logit_p")
         if p is not None:
             self._is_logit = False
             self.p = p = tt.as_tensor_variable(p)
@@ -250,13 +275,13 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs):
             self.p = tt.nnet.sigmoid(logit_p)
             self._logit_p = tt.as_tensor_variable(logit_p)
 
-        self.mode = tt.cast(tround(self.p), 'int8')
+        self.mode = tt.cast(tround(self.p), "int8")
 
     def random(self, point=None, size=None):
         p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(stats.bernoulli.rvs, p,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            stats.bernoulli.rvs, p, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         if self._is_logit:
@@ -266,20 +291,24 @@ def logp(self, value):
             p = self.p
             return bound(
                 tt.switch(value, tt.log(p), tt.log(1 - p)),
-                value >= 0, value <= 1,
-                p >= 0, p <= 1)
+                value >= 0,
+                value <= 1,
+                p >= 0,
+                p <= 1,
+            )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         p = dist.p
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Bernoulli}}(\mathit{{p}}={})$'.format(name,
-                                                get_variable_name(p))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Bernoulli}}(\mathit{{p}}={})$".format(
+            name, get_variable_name(p)
+        )
 
 
 class DiscreteWeibull(Discrete):
-    R"""Discrete Weibull log-likelihood
+    r"""Discrete Weibull log-likelihood
 
     The discrete Weibull distribution is a flexible model of count data that
     can handle both over- and under-dispersion.
@@ -316,8 +345,9 @@ def DiscreteWeibull(q, b, x):
     Variance  :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2`
     ========  ======================
     """
+
     def __init__(self, q, beta, *args, **kwargs):
-        super(DiscreteWeibull, self).__init__(*args, defaults=('median',), **kwargs)
+        super(DiscreteWeibull, self).__init__(*args, defaults=("median",), **kwargs)
 
         self.q = q = tt.as_tensor_variable(q)
         self.beta = beta = tt.as_tensor_variable(beta)
@@ -328,10 +358,16 @@ def logp(self, value):
         q = self.q
         beta = self.beta
 
-        return bound(tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))),
-                     0 <= value,
-                     0 < q, q < 1,
-                     0 < beta)
+        return bound(
+            tt.log(
+                tt.power(q, tt.power(value, beta))
+                - tt.power(q, tt.power(value + 1, beta))
+            ),
+            0 <= value,
+            0 < q,
+            q < 1,
+            0 < beta,
+        )
 
     def _ppf(self, p):
         """
@@ -341,33 +377,33 @@ def _ppf(self, p):
         q = self.q
         beta = self.beta
 
-        return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1. / beta)) - 1).astype('int64')
+        return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype(
+            "int64"
+        )
 
     def _random(self, q, beta, size=None):
         p = np.random.uniform(size=size)
 
-        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1. / beta)) - 1
+        return np.ceil(np.power(np.log(1 - p) / np.log(q), 1.0 / beta)) - 1
 
     def random(self, point=None, size=None):
         q, beta = draw_values([self.q, self.beta], point=point, size=size)
 
-        return generate_samples(self._random, q, beta,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         q = dist.q
         beta = dist.beta
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{DiscreteWeibull}}(\mathit{{q}}={},~\mathit{{beta}}={})$'.format(name,
-                                                get_variable_name(q),
-                                                get_variable_name(beta))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{DiscreteWeibull}}(\mathit{{q}}={},~\mathit{{beta}}={})$".format(
+            name, get_variable_name(q), get_variable_name(beta)
+        )
 
 
 class Poisson(Discrete):
-    R"""
+    r"""
     Poisson log-likelihood.
 
     Often used to model the number of events occurring in a fixed period
@@ -413,34 +449,30 @@ class Poisson(Discrete):
     def __init__(self, mu, *args, **kwargs):
         super(Poisson, self).__init__(*args, **kwargs)
         self.mu = mu = tt.as_tensor_variable(mu)
-        self.mode = tt.floor(mu).astype('int32')
+        self.mode = tt.floor(mu).astype("int32")
 
     def random(self, point=None, size=None):
         mu = draw_values([self.mu], point=point, size=size)[0]
-        return generate_samples(stats.poisson.rvs, mu,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         mu = self.mu
-        log_prob = bound(
-            logpow(mu, value) - factln(value) - mu,
-            mu >= 0, value >= 0)
+        log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0)
         # Return zero when mu and value are both zero
-        return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0),
-                         0, log_prob)
+        return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         mu = dist.mu
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Poisson}}(\mathit{{mu}}={})$'.format(name,
-                                                get_variable_name(mu))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Poisson}}(\mathit{{mu}}={})$".format(
+            name, get_variable_name(mu)
+        )
 
 
 class NegativeBinomial(Discrete):
-    R"""
+    r"""
     Negative binomial log-likelihood.
 
     The negative binomial distribution describes a Poisson random variable
@@ -493,42 +525,46 @@ def __init__(self, mu, alpha, *args, **kwargs):
         super(NegativeBinomial, self).__init__(*args, **kwargs)
         self.mu = mu = tt.as_tensor_variable(mu)
         self.alpha = alpha = tt.as_tensor_variable(alpha)
-        self.mode = tt.floor(mu).astype('int32')
+        self.mode = tt.floor(mu).astype("int32")
 
     def random(self, point=None, size=None):
         mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size)
-        g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha,
-                             dist_shape=self.shape,
-                             size=size)
+        g = generate_samples(
+            stats.gamma.rvs, alpha, scale=mu / alpha, dist_shape=self.shape, size=size
+        )
         g[g == 0] = np.finfo(float).eps  # Just in case
         return np.asarray(stats.poisson.rvs(g)).reshape(g.shape)
 
     def logp(self, value):
         mu = self.mu
         alpha = self.alpha
-        negbinom = bound(binomln(value + alpha - 1, value)
-                         + logpow(mu / (mu + alpha), value)
-                         + logpow(alpha / (mu + alpha), alpha),
-                         value >= 0, mu > 0, alpha > 0)
+        negbinom = bound(
+            binomln(value + alpha - 1, value)
+            + logpow(mu / (mu + alpha), value)
+            + logpow(alpha / (mu + alpha), alpha),
+            value >= 0,
+            mu > 0,
+            alpha > 0,
+        )
 
         # Return Poisson when alpha gets very large.
-        return tt.switch(tt.gt(alpha, 1e10),
-                         Poisson.dist(self.mu).logp(value),
-                         negbinom)
+        return tt.switch(
+            tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         mu = dist.mu
         alpha = dist.alpha
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{NegativeBinomial}}(\mathit{{mu}}={},~\mathit{{alpha}}={})$'.format(name,
-                                                get_variable_name(mu),
-                                                get_variable_name(alpha))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{NegativeBinomial}}(\mathit{{mu}}={},~\mathit{{alpha}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(alpha)
+        )
 
 
 class Geometric(Discrete):
-    R"""
+    r"""
     Geometric log-likelihood.
 
     The probability that the first success in a sequence of Bernoulli
@@ -571,26 +607,26 @@ def __init__(self, p, *args, **kwargs):
 
     def random(self, point=None, size=None):
         p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(np.random.geometric, p,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            np.random.geometric, p, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         p = self.p
-        return bound(tt.log(p) + logpow(1 - p, value - 1),
-                     0 <= p, p <= 1, value >= 1)
+        return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         p = dist.p
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Geometric}}(\mathit{{p}}={})$'.format(name,
-                                                get_variable_name(p))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Geometric}}(\mathit{{p}}={})$".format(
+            name, get_variable_name(p)
+        )
 
 
 class DiscreteUniform(Discrete):
-    R"""
+    r"""
     Discrete uniform distribution.
     The pmf of this distribution is
 
@@ -630,10 +666,11 @@ class DiscreteUniform(Discrete):
 
     def __init__(self, lower, upper, *args, **kwargs):
         super(DiscreteUniform, self).__init__(*args, **kwargs)
-        self.lower = tt.floor(lower).astype('int32')
-        self.upper = tt.floor(upper).astype('int32')
+        self.lower = tt.floor(lower).astype("int32")
+        self.upper = tt.floor(upper).astype("int32")
         self.mode = tt.maximum(
-            tt.floor((upper + lower) / 2.).astype('int32'), self.lower)
+            tt.floor((upper + lower) / 2.0).astype("int32"), self.lower
+        )
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -643,30 +680,28 @@ def _random(self, lower, upper, size=None):
 
     def random(self, point=None, size=None):
         lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
-        return generate_samples(self._random,
-                                lower, upper,
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            self._random, lower, upper, dist_shape=self.shape, size=size
+        )
 
     def logp(self, value):
         upper = self.upper
         lower = self.lower
-        return bound(-tt.log(upper - lower + 1),
-                     lower <= value, value <= upper)
+        return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         lower = dist.lower
         upper = dist.upper
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{DiscreteUniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$'.format(name,
-                                                get_variable_name(lower),
-                                                get_variable_name(upper))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{DiscreteUniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$".format(
+            name, get_variable_name(lower), get_variable_name(upper)
+        )
 
 
 class Categorical(Discrete):
-    R"""
+    r"""
     Categorical log-likelihood.
 
     The most general discrete distribution. The pmf of this distribution is
@@ -713,11 +748,13 @@ def __init__(self, p, *args, **kwargs):
     def random(self, point=None, size=None):
         p, k = draw_values([self.p, self.k], point=point, size=size)
 
-        return generate_samples(random_choice,
-                                p=p,
-                                broadcast_shape=p.shape[:-1] or (1,),
-                                dist_shape=self.shape,
-                                size=size)
+        return generate_samples(
+            random_choice,
+            p=p,
+            broadcast_shape=p.shape[:-1] or (1,),
+            dist_shape=self.shape,
+            size=size,
+        )
 
     def logp(self, value):
         p = self.p
@@ -726,8 +763,7 @@ def logp(self, value):
         # Clip values before using them for indexing
         value_clip = tt.clip(value, 0, k - 1)
 
-        sumto1 = theano.gradient.zero_grad(
-            tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))
+        sumto1 = theano.gradient.zero_grad(tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))
 
         if p.ndim > 1:
             a = tt.log(p[tt.arange(p.shape[0]), value_clip])
@@ -740,9 +776,10 @@ def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         p = dist.p
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Categorical}}(\mathit{{p}}={})$'.format(name,
-                                                get_variable_name(p))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Categorical}}(\mathit{{p}}={})$".format(
+            name, get_variable_name(p)
+        )
 
 
 class Constant(Discrete):
@@ -756,8 +793,10 @@ class Constant(Discrete):
     """
 
     def __init__(self, c, *args, **kwargs):
-        warnings.warn("Constant has been deprecated. We recommend using a Deterministic object instead.",
-                    DeprecationWarning)
+        warnings.warn(
+            "Constant has been deprecated. We recommend using a Deterministic object instead.",
+            DeprecationWarning,
+        )
         super(Constant, self).__init__(*args, **kwargs)
         self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c)
 
@@ -768,8 +807,9 @@ def random(self, point=None, size=None):
         def _random(c, dtype=dtype, size=None):
             return np.full(size, fill_value=c, dtype=dtype)
 
-        return generate_samples(_random, c=c, dist_shape=self.shape,
-                                size=size).astype(dtype)
+        return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(
+            dtype
+        )
 
     def logp(self, value):
         c = self.c
@@ -778,15 +818,15 @@ def logp(self, value):
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{Constant}}()$'.format(name)
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{Constant}}()$".format(name)
 
 
 ConstantDist = Constant
 
 
 class ZeroInflatedPoisson(Discrete):
-    R"""
+    r"""
     Zero-inflated Poisson log-likelihood.
 
     Often used to model the number of events occurring in a fixed period
@@ -844,9 +884,7 @@ def __init__(self, psi, theta, *args, **kwargs):
 
     def random(self, point=None, size=None):
         theta, psi = draw_values([self.theta, self.psi], point=point, size=size)
-        g = generate_samples(stats.poisson.rvs, theta,
-                             dist_shape=self.shape,
-                             size=size)
+        g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size)
         return g * (np.random.random(np.squeeze(g.shape)) < psi)
 
     def logp(self, value):
@@ -856,27 +894,24 @@ def logp(self, value):
         logp_val = tt.switch(
             tt.gt(value, 0),
             tt.log(psi) + self.pois.logp(value),
-            logaddexp(tt.log1p(-psi), tt.log(psi) - theta))
+            logaddexp(tt.log1p(-psi), tt.log(psi) - theta),
+        )
 
-        return bound(
-            logp_val,
-            0 <= value,
-            0 <= psi, psi <= 1,
-            0 <= theta)
+        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         theta = dist.theta
         psi = dist.psi
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{theta}}={},~\mathit{{psi}}={})$'.format(name,
-                                                get_variable_name(theta),
-                                                get_variable_name(psi))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{ZeroInflatedPoisson}}(\mathit{{theta}}={},~\mathit{{psi}}={})$".format(
+            name, get_variable_name(theta), get_variable_name(psi)
+        )
 
 
 class ZeroInflatedBinomial(Discrete):
-    R"""
+    r"""
     Zero-inflated Binomial log-likelihood.
 
     The pmf of this distribution is
@@ -936,9 +971,7 @@ def __init__(self, psi, n, p, *args, **kwargs):
 
     def random(self, point=None, size=None):
         n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size)
-        g = generate_samples(stats.binom.rvs, n, p,
-                             dist_shape=self.shape,
-                             size=size)
+        g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size)
         return g * (np.random.random(np.squeeze(g.shape)) < psi)
 
     def logp(self, value):
@@ -949,13 +982,12 @@ def logp(self, value):
         logp_val = tt.switch(
             tt.gt(value, 0),
             tt.log(psi) + self.bin.logp(value),
-            logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)))
+            logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)),
+        )
 
         return bound(
-            logp_val,
-            0 <= value, value <= n,
-            0 <= psi, psi <= 1,
-            0 <= p, p <= 1)
+            logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -967,15 +999,16 @@ def _repr_latex_(self, name=None, dist=None):
         name_n = get_variable_name(n)
         name_p = get_variable_name(p)
         name_psi = get_variable_name(psi)
-        name = r'\text{%s}' % name
-        return (r'${} \sim \text{{ZeroInflatedBinomial}}'
-                r'(\mathit{{n}}={},~\mathit{{p}}={},~'
-                r'\mathit{{psi}}={})$'
-                .format(name, name_n, name_p, name_psi))
+        name = r"\text{%s}" % name
+        return (
+            r"${} \sim \text{{ZeroInflatedBinomial}}"
+            r"(\mathit{{n}}={},~\mathit{{p}}={},~"
+            r"\mathit{{psi}}={})$".format(name, name_n, name_p, name_psi)
+        )
 
 
 class ZeroInflatedNegativeBinomial(Discrete):
-    R"""
+    r"""
     Zero-Inflated Negative binomial log-likelihood.
 
     The Zero-inflated version of the Negative Binomial (NB).
@@ -1052,10 +1085,11 @@ def __init__(self, psi, mu, alpha, *args, **kwargs):
 
     def random(self, point=None, size=None):
         mu, alpha, psi = draw_values(
-            [self.mu, self.alpha, self.psi], point=point, size=size)
-        g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha,
-                             dist_shape=self.shape,
-                             size=size)
+            [self.mu, self.alpha, self.psi], point=point, size=size
+        )
+        g = generate_samples(
+            stats.gamma.rvs, alpha, scale=mu / alpha, dist_shape=self.shape, size=size
+        )
         g[g == 0] = np.finfo(float).eps  # Just in case
         return stats.poisson.rvs(g) * (np.random.random(np.squeeze(g.shape)) < psi)
 
@@ -1066,19 +1100,12 @@ def logp(self, value):
 
         logp_other = tt.log(psi) + self.nb.logp(value)
         logp_0 = logaddexp(
-            tt.log1p(-psi),
-            tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)))
+            tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))
+        )
 
-        logp_val = tt.switch(
-            tt.gt(value, 0),
-            logp_other,
-            logp_0)
+        logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0)
 
-        return bound(
-            logp_val,
-            0 <= value,
-            0 <= psi, psi <= 1,
-            mu > 0, alpha > 0)
+        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -1090,15 +1117,16 @@ def _repr_latex_(self, name=None, dist=None):
         name_mu = get_variable_name(mu)
         name_alpha = get_variable_name(alpha)
         name_psi = get_variable_name(psi)
-        name = r'\text{%s}' % name
-        return (r'${} \sim \text{{ZeroInflatedNegativeBinomial}}'
-                r'(\mathit{{mu}}={},~\mathit{{alpha}}={},~'
-                r'\mathit{{psi}}={})$'
-                .format(name, name_mu, name_alpha, name_psi))
+        name = r"\text{%s}" % name
+        return (
+            r"${} \sim \text{{ZeroInflatedNegativeBinomial}}"
+            r"(\mathit{{mu}}={},~\mathit{{alpha}}={},~"
+            r"\mathit{{psi}}={})$".format(name, name_mu, name_alpha, name_psi)
+        )
 
 
 class OrderedLogistic(Categorical):
-    R"""
+    r"""
     Ordered Logistic log-likelihood.
 
     Useful for regression on ordinal data values whose values range
@@ -1167,11 +1195,14 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
         self.cutpoints = tt.as_tensor_variable(cutpoints)
 
         pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
-        p_cum = tt.concatenate([
-            tt.zeros_like(tt.shape_padright(pa[:, 0])),
-            pa,
-            tt.ones_like(tt.shape_padright(pa[:, 0]))
-        ], axis=1)
+        p_cum = tt.concatenate(
+            [
+                tt.zeros_like(tt.shape_padright(pa[:, 0])),
+                pa,
+                tt.ones_like(tt.shape_padright(pa[:, 0])),
+            ],
+            axis=1,
+        )
         p = p_cum[:, 1:] - p_cum[:, :-1]
 
         super(OrderedLogistic, self).__init__(p=p, *args, **kwargs)
@@ -1181,6 +1212,9 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         name_eta = get_variable_name(dist.eta)
         name_cutpoints = get_variable_name(dist.cutpoints)
-        return (r'${} \sim \text{{OrderedLogistic}}'
-                r'(\mathit{{eta}}={}, \mathit{{cutpoints}}={}$'
-                .format(name, name_eta, name_cutpoints))
+        return (
+            r"${} \sim \text{{OrderedLogistic}}"
+            r"(\mathit{{eta}}={}, \mathit{{cutpoints}}={}$".format(
+                name, name_eta, name_cutpoints
+            )
+        )
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 7ea1b6b28d..d12fb766c2 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -1,8 +1,8 @@
-'''
+"""
 Created on Mar 7, 2011
 
 @author: johnsalvatier
-'''
+"""
 from __future__ import division
 
 import numpy as np
@@ -19,7 +19,7 @@
 
 
 f = floatX
-c = - .5 * np.log(2. * np.pi)
+c = -0.5 * np.log(2.0 * np.pi)
 
 
 def bound(logp, *conditions, **kwargs):
@@ -41,7 +41,7 @@ def bound(logp, *conditions, **kwargs):
     -------
     logp with elements set to -inf where any condition is False
     """
-    broadcast_conditions = kwargs.get('broadcast_conditions', True)
+    broadcast_conditions = kwargs.get("broadcast_conditions", True)
 
     if broadcast_conditions:
         alltrue = alltrue_elemwise
@@ -86,7 +86,7 @@ def std_cdf(x):
     """
     Calculates the standard normal cumulative distribution function.
     """
-    return .5 + .5 * tt.erf(x / tt.sqrt(2.))
+    return 0.5 + 0.5 * tt.erf(x / tt.sqrt(2.0))
 
 
 def normal_lcdf(mu, sigma, x):
@@ -94,8 +94,8 @@ def normal_lcdf(mu, sigma, x):
     z = (x - mu) / sigma
     return tt.switch(
         tt.lt(z, -1.0),
-        tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
-        tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
+        tt.log(tt.erfcx(-z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0,
+        tt.log1p(-tt.erfc(z / tt.sqrt(2.0)) / 2.0),
     )
 
 
@@ -103,8 +103,8 @@ def normal_lccdf(mu, sigma, x):
     z = (x - mu) / sigma
     return tt.switch(
         tt.gt(z, 1.0),
-        tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
-        tt.log1p(-tt.erfc(-z / tt.sqrt(2.)) / 2.)
+        tt.log(tt.erfcx(z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0,
+        tt.log1p(-tt.erfc(-z / tt.sqrt(2.0)) / 2.0),
     )
 
 
@@ -112,7 +112,7 @@ def sd2rho(sd):
     """
     `sd -> rho` theano converter
     :math:`mu + sd*e = mu + log(1+exp(rho))*e`"""
-    return tt.log(tt.exp(tt.abs_(sd)) - 1.)
+    return tt.log(tt.exp(tt.abs_(sd)) - 1.0)
 
 
 def rho2sd(rho):
@@ -145,16 +145,16 @@ def log_normal(x, mean, **kwargs):
         4) `tau` that follows this equation :math:`tau = std^{-1}`
     ----
     """
-    sd = kwargs.get('sd')
-    w = kwargs.get('w')
-    rho = kwargs.get('rho')
-    tau = kwargs.get('tau')
-    eps = kwargs.get('eps', 0.)
+    sd = kwargs.get("sd")
+    w = kwargs.get("w")
+    rho = kwargs.get("rho")
+    tau = kwargs.get("tau")
+    eps = kwargs.get("eps", 0.0)
     check = sum(map(lambda a: a is not None, [sd, w, rho, tau]))
     if check > 1:
-        raise ValueError('more than one required kwarg is passed')
+        raise ValueError("more than one required kwarg is passed")
     if check == 0:
-        raise ValueError('none of required kwarg is passed')
+        raise ValueError("none of required kwarg is passed")
     if sd is not None:
         std = sd
     elif w is not None:
@@ -162,9 +162,9 @@ def log_normal(x, mean, **kwargs):
     elif rho is not None:
         std = rho2sd(rho)
     else:
-        std = tau**(-1)
+        std = tau ** (-1)
     std += f(eps)
-    return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2. * std ** 2)
+    return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2.0 * std ** 2)
 
 
 def MvNormalLogp():
@@ -179,14 +179,14 @@ def MvNormalLogp():
     delta : tt.matrix
         Array of deviations from the mean.
     """
-    cov = tt.matrix('cov')
+    cov = tt.matrix("cov")
     cov.tag.test_value = floatX(np.eye(3))
-    delta = tt.matrix('delta')
+    delta = tt.matrix("delta")
     delta.tag.test_value = floatX(np.zeros((2, 3)))
 
-    solve_lower = tt.slinalg.Solve(A_structure='lower_triangular')
-    solve_upper = tt.slinalg.Solve(A_structure='upper_triangular')
-    cholesky = Cholesky(lower=True, on_error='nan')
+    solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
+    solve_upper = tt.slinalg.Solve(A_structure="upper_triangular")
+    cholesky = Cholesky(lower=True, on_error="nan")
 
     n, k = delta.shape
     n, k = f(n), f(k)
@@ -200,14 +200,14 @@ def MvNormalLogp():
     result = n * k * tt.log(f(2) * np.pi)
     result += f(2) * n * tt.sum(tt.log(diag))
     result += (delta_trans ** f(2)).sum()
-    result = f(-.5) * result
+    result = f(-0.5) * result
     logp = tt.switch(ok, result, -np.inf)
 
     def dlogp(inputs, gradients):
         g_logp, = gradients
         cov, delta = inputs
 
-        g_logp.tag.test_value = floatX(1.)
+        g_logp.tag.test_value = floatX(1.0)
         n, k = delta.shape
 
         chol_cov = cholesky(cov)
@@ -229,8 +229,7 @@ def dlogp(inputs, gradients):
 
         return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
 
-    return theano.OpFromGraph(
-        [cov, delta], [logp], grad_overrides=dlogp, inline=True)
+    return theano.OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
 
 
 class SplineWrapper(theano.Op):
@@ -238,7 +237,7 @@ class SplineWrapper(theano.Op):
     Creates a theano operation from scipy.interpolate.UnivariateSpline
     """
 
-    __props__ = ('spline',)
+    __props__ = ("spline",)
 
     def __init__(self, spline):
         self.spline = spline
@@ -249,14 +248,14 @@ def make_node(self, x):
 
     @property
     def grad_op(self):
-        if not hasattr(self, '_grad_op'):
+        if not hasattr(self, "_grad_op"):
             try:
                 self._grad_op = SplineWrapper(self.spline.derivative())
             except ValueError:
                 self._grad_op = None
 
         if self._grad_op is None:
-            raise NotImplementedError('Spline of order 0 is not differentiable')
+            raise NotImplementedError("Spline of order 0 is not differentiable")
         return self._grad_op
 
     def perform(self, node, inputs, output_storage):
@@ -270,18 +269,18 @@ def grad(self, inputs, grads):
         return [x_grad * self.grad_op(x)]
 
 
-
 class I0e(UnaryScalarOp):
     """
     Modified Bessel function of the first kind of order 0, exponentially scaled.
     """
-    nfunc_spec = ('scipy.special.i0e', 1, 1)
+
+    nfunc_spec = ("scipy.special.i0e", 1, 1)
 
     def impl(self, x):
         return scipy.special.i0e(x)
 
 
-i0e = I0e(upgrade_to_float, name='i0e')
+i0e = I0e(upgrade_to_float, name="i0e")
 
 
 def random_choice(*args, **kwargs):
@@ -299,8 +298,8 @@ def random_choice(*args, **kwargs):
         random sample: array
 
     """
-    p = kwargs.pop('p')
-    size = kwargs.pop('size')
+    p = kwargs.pop("p")
+    size = kwargs.pop("size")
     k = p.shape[-1]
 
     if p.ndim > 1:
@@ -319,17 +318,17 @@ def zvalue(value, sd, mu):
 
 
 def incomplete_beta_cfe(a, b, x, small):
-    '''Incomplete beta continued fraction expansions
+    """Incomplete beta continued fraction expansions
     based on Cephes library by Steve Moshier (incbet.c).
     small: Choose element-wise which continued fraction expansion to use.
-    '''
-    BIG = tt.constant(4.503599627370496e15, dtype='float64')
-    BIGINV = tt.constant(2.22044604925031308085e-16, dtype='float64')
-    THRESH = tt.constant(3. * np.MachAr().eps, dtype='float64')
+    """
+    BIG = tt.constant(4.503599627370496e15, dtype="float64")
+    BIGINV = tt.constant(2.22044604925031308085e-16, dtype="float64")
+    THRESH = tt.constant(3.0 * np.MachAr().eps, dtype="float64")
 
-    zero = tt.constant(0., dtype='float64')
-    one = tt.constant(1., dtype='float64')
-    two = tt.constant(2., dtype='float64')
+    zero = tt.constant(0.0, dtype="float64")
+    one = tt.constant(1.0, dtype="float64")
+    two = tt.constant(2.0, dtype="float64")
 
     r = one
     k1 = a
@@ -350,11 +349,7 @@ def incomplete_beta_cfe(a, b, x, small):
     qkm1 = one
     r = one
 
-    def _step(
-            i,
-            pkm1, pkm2, qkm1, qkm2,
-            k1, k2, k3, k4, k5, k6, k7, k8, r
-    ):
+    def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r):
         xk = -(x * k1 * k2) / (k3 * k4)
         pk = pkm1 + pkm2 * xk
         qk = qkm1 + qkm2 * xk
@@ -372,7 +367,7 @@ def _step(
         qkm1 = qk
 
         old_r = r
-        r = tt.switch(tt.eq(qk, zero), r, pk/qk)
+        r = tt.switch(tt.eq(qk, zero), r, pk / qk)
 
         k1 += one
         k2 += k26update
@@ -384,10 +379,7 @@ def _step(
         k8 += two
 
         big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG)
-        biginv_cond = tt.or_(
-            tt.lt(tt.abs_(qk), BIGINV),
-            tt.lt(tt.abs_(pk), BIGINV)
-        )
+        biginv_cond = tt.or_(tt.lt(tt.abs_(qk), BIGINV), tt.lt(tt.abs_(pk), BIGINV))
 
         pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2)
         pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1)
@@ -399,37 +391,37 @@ def _step(
         qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2)
         qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1)
 
-        return ((pkm1, pkm2, qkm1, qkm2,
-                 k1, k2, k3, k4, k5, k6, k7, k8, r),
-                until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))))
+        return (
+            (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r),
+            until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))),
+        )
 
-    (pkm1, pkm2, qkm1, qkm2,
-     k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan(
+    (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan(
         _step,
         sequences=[tt.arange(0, 300)],
         outputs_info=[
-            e for e in
-            tt.cast((pkm1, pkm2, qkm1, qkm2,
-                     k1, k2, k3, k4, k5, k6, k7, k8, r),
-                    'float64')
-        ]
+            e
+            for e in tt.cast(
+                (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64"
+            )
+        ],
     )
 
     return r[-1]
 
 
 def incomplete_beta_ps(a, b, value):
-    '''Power series for incomplete beta
+    """Power series for incomplete beta
     Use when b*x is small and value not too close to 1.
     Based on Cephes library by Steve Moshier (incbet.c)
-    '''
-    one = tt.constant(1, dtype='float64')
+    """
+    one = tt.constant(1, dtype="float64")
     ai = one / a
     u = (one - b) * value
     t1 = u / (a + one)
     t = u
     threshold = np.MachAr().eps * ai
-    s = tt.constant(0, dtype='float64')
+    s = tt.constant(0, dtype="float64")
 
     def _step(i, t, s):
         t *= (i - b) * value / i
@@ -440,30 +432,22 @@ def _step(i, t, s):
     (t, s), _ = scan(
         _step,
         sequences=[tt.arange(2, 302)],
-        outputs_info=[
-            e for e in
-            tt.cast((t, s),
-                    'float64')
-        ]
+        outputs_info=[e for e in tt.cast((t, s), "float64")],
     )
 
     s = s[-1] + t1 + ai
 
-    t = (
-        gammaln(a + b) - gammaln(a) - gammaln(b) +
-        a * tt.log(value) +
-        tt.log(s)
-    )
+    t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * tt.log(value) + tt.log(s)
     return tt.exp(t)
 
 
 def incomplete_beta(a, b, value):
-    '''Incomplete beta implementation
+    """Incomplete beta implementation
     Power series and continued fraction expansions chosen for best numerical
     convergence across the board based on inputs.
-    '''
-    machep = tt.constant(np.MachAr().eps, dtype='float64')
-    one = tt.constant(1, dtype='float64')
+    """
+    machep = tt.constant(np.MachAr().eps, dtype="float64")
+    one = tt.constant(1, dtype="float64")
     w = one - value
 
     ps = incomplete_beta_ps(a, b, value)
@@ -485,20 +469,17 @@ def incomplete_beta(a, b, value):
 
     # Direct incomplete beta accounting for flipped a, b.
     t = tt.exp(
-        a * tt.log(x) + b * tt.log(xc) +
-        gammaln(a + b) - gammaln(a) - gammaln(b) +
-        tt.log(w / a)
+        a * tt.log(x)
+        + b * tt.log(xc)
+        + gammaln(a + b)
+        - gammaln(a)
+        - gammaln(b)
+        + tt.log(w / a)
     )
 
-    t = tt.switch(
-        flip,
-        tt.switch(tt.le(t, machep), one - machep, one - t),
-        t
-    )
+    t = tt.switch(flip, tt.switch(tt.le(t, machep), one - machep, one - t), t)
     return tt.switch(
         tt.and_(flip, tt.and_(tt.le((b * x), one), tt.le(x, 0.95))),
         tps,
-        tt.switch(
-            tt.and_(tt.le(b * value, one), tt.le(value, 0.95)),
-            ps,
-            t))
+        tt.switch(tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), ps, t),
+    )
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 5783609765..28c97492af 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -7,13 +7,24 @@
 import theano
 from ..memoize import memoize
 from ..model import (
-    Model, get_named_nodes_and_relations, FreeRV,
-    ObservedRV, MultiObservedRV
+    Model,
+    get_named_nodes_and_relations,
+    FreeRV,
+    ObservedRV,
+    MultiObservedRV,
 )
 from ..vartypes import string_types
 
-__all__ = ['DensityDist', 'Distribution', 'Continuous', 'Discrete',
-           'NoDistribution', 'TensorType', 'draw_values', 'generate_samples']
+__all__ = [
+    "DensityDist",
+    "Distribution",
+    "Continuous",
+    "Discrete",
+    "NoDistribution",
+    "TensorType",
+    "draw_values",
+    "generate_samples",
+]
 
 
 class _Unpickling(object):
@@ -22,29 +33,34 @@ class _Unpickling(object):
 
 class Distribution(object):
     """Statistical distribution"""
+
     def __new__(cls, name, *args, **kwargs):
         if name is _Unpickling:
             return object.__new__(cls)  # for pickle
         try:
             model = Model.get_context()
         except TypeError:
-            raise TypeError("No model on context stack, which is needed to "
-                            "instantiate distributions. Add variable inside "
-                            "a 'with model:' block, or use the '.dist' syntax "
-                            "for a standalone distribution.")
+            raise TypeError(
+                "No model on context stack, which is needed to "
+                "instantiate distributions. Add variable inside "
+                "a 'with model:' block, or use the '.dist' syntax "
+                "for a standalone distribution."
+            )
 
         if isinstance(name, string_types):
-            data = kwargs.pop('observed', None)
+            data = kwargs.pop("observed", None)
             if isinstance(data, ObservedRV) or isinstance(data, FreeRV):
-                raise TypeError("observed needs to be data but got: {}".format(type(data)))
-            total_size = kwargs.pop('total_size', None)
+                raise TypeError(
+                    "observed needs to be data but got: {}".format(type(data))
+                )
+            total_size = kwargs.pop("total_size", None)
             dist = cls.dist(*args, **kwargs)
             return model.Var(name, dist, data, total_size)
         else:
             raise TypeError("Name needs to be a string but got: {}".format(name))
 
     def __getnewargs__(self):
-        return _Unpickling,
+        return (_Unpickling,)
 
     @classmethod
     def dist(cls, *args, **kwargs):
@@ -52,8 +68,15 @@ def dist(cls, *args, **kwargs):
         dist.__init__(*args, **kwargs)
         return dist
 
-    def __init__(self, shape, dtype, testval=None, defaults=(),
-                 transform=None, broadcastable=None):
+    def __init__(
+        self,
+        shape,
+        dtype,
+        testval=None,
+        defaults=(),
+        transform=None,
+        broadcastable=None,
+    ):
         self.shape = np.atleast_1d(shape)
         if False in (np.floor(self.shape) == self.shape):
             raise TypeError("Expected int elements in shape")
@@ -75,10 +98,11 @@ def get_test_val(self, val, defaults):
             return self.getattr_value(val)
 
         if val is None:
-            raise AttributeError("%s has no finite default value to use, "
-                                 "checked: %s. Pass testval argument or "
-                                 "adjust so value is finite."
-                                 % (self, str(defaults)))
+            raise AttributeError(
+                "%s has no finite default value to use, "
+                "checked: %s. Pass testval argument or "
+                "adjust so value is finite." % (self, str(defaults))
+            )
 
     def getattr_value(self, val):
         if isinstance(val, string_types):
@@ -128,20 +152,32 @@ def TensorType(dtype, shape, broadcastable=None):
 
 
 class NoDistribution(Distribution):
-
-    def __init__(self, shape, dtype, testval=None, defaults=(),
-                 transform=None, parent_dist=None, *args, **kwargs):
-        super(NoDistribution, self).__init__(shape=shape, dtype=dtype,
-                                             testval=testval, defaults=defaults,
-                                             *args, **kwargs)
+    def __init__(
+        self,
+        shape,
+        dtype,
+        testval=None,
+        defaults=(),
+        transform=None,
+        parent_dist=None,
+        *args,
+        **kwargs
+    ):
+        super(NoDistribution, self).__init__(
+            shape=shape,
+            dtype=dtype,
+            testval=testval,
+            defaults=defaults,
+            *args,
+            **kwargs
+        )
         self.parent_dist = parent_dist
 
     def __getattr__(self, name):
         # Do not use __getstate__ and __setstate__ from parent_dist
         # to avoid infinite recursion during unpickling
-        if name.startswith('__'):
-            raise AttributeError(
-                "'NoDistribution' has no attribute '%s'" % name)
+        if name.startswith("__"):
+            raise AttributeError("'NoDistribution' has no attribute '%s'" % name)
         return getattr(self.parent_dist, name)
 
     def logp(self, x):
@@ -151,33 +187,34 @@ def logp(self, x):
 class Discrete(Distribution):
     """Base class for discrete distributions"""
 
-    def __init__(self, shape=(), dtype=None, defaults=('mode',),
-                 *args, **kwargs):
+    def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs):
         if dtype is None:
-            if theano.config.floatX == 'float32':
-                dtype = 'int16'
+            if theano.config.floatX == "float32":
+                dtype = "int16"
             else:
-                dtype = 'int64'
-        if dtype != 'int16' and dtype != 'int64':
-            raise TypeError('Discrete classes expect dtype to be int16 or int64.')
+                dtype = "int64"
+        if dtype != "int16" and dtype != "int64":
+            raise TypeError("Discrete classes expect dtype to be int16 or int64.")
 
-        if kwargs.get('transform', None) is not None:
-            raise ValueError("Transformations for discrete distributions "
-                             "are not allowed.")
+        if kwargs.get("transform", None) is not None:
+            raise ValueError(
+                "Transformations for discrete distributions " "are not allowed."
+            )
 
-        super(Discrete, self).__init__(
-            shape, dtype, defaults=defaults, *args, **kwargs)
+        super(Discrete, self).__init__(shape, dtype, defaults=defaults, *args, **kwargs)
 
 
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
-    def __init__(self, shape=(), dtype=None, defaults=('median', 'mean', 'mode'),
-                 *args, **kwargs):
+    def __init__(
+        self, shape=(), dtype=None, defaults=("median", "mean", "mode"), *args, **kwargs
+    ):
         if dtype is None:
             dtype = theano.config.floatX
         super(Continuous, self).__init__(
-            shape, dtype, defaults=defaults, *args, **kwargs)
+            shape, dtype, defaults=defaults, *args, **kwargs
+        )
 
 
 class DensityDist(Distribution):
@@ -198,11 +235,12 @@ class DensityDist(Distribution):
 
     """
 
-    def __init__(self, logp, shape=(), dtype=None, testval=0, random=None, *args, **kwargs):
+    def __init__(
+        self, logp, shape=(), dtype=None, testval=0, random=None, *args, **kwargs
+    ):
         if dtype is None:
             dtype = theano.config.floatX
-        super(DensityDist, self).__init__(
-            shape, dtype, testval, *args, **kwargs)
+        super(DensityDist, self).__init__(shape, dtype, testval, *args, **kwargs)
         self.logp = logp
         self.rand = random
 
@@ -210,8 +248,10 @@ def random(self, *args, **kwargs):
         if self.rand is not None:
             return self.rand(*args, **kwargs)
         else:
-            raise ValueError("Distribution was not passed any random method "
-                            "Define a custom random method and pass it as kwarg random")
+            raise ValueError(
+                "Distribution was not passed any random method "
+                "Define a custom random method and pass it as kwarg random"
+            )
 
 
 def draw_values(params, point=None, size=None):
@@ -239,7 +279,7 @@ def draw_values(params, point=None, size=None):
     named_nodes_parents = {}
     named_nodes_children = {}
     for param in params:
-        if hasattr(param, 'name'):
+        if hasattr(param, "name"):
             # Get the named nodes under the `param` node
             nn, nnp, nnc = get_named_nodes_and_relations(param)
             leaf_nodes.update(nn)
@@ -265,8 +305,7 @@ def draw_values(params, point=None, size=None):
         if next_ in stored:
             # If the node already has a givens value, skip it
             continue
-        elif isinstance(next_, (tt.TensorConstant,
-                                tt.sharedvar.SharedVariable)):
+        elif isinstance(next_, (tt.TensorConstant, tt.sharedvar.SharedVariable)):
             # If the node is a theano.tensor.TensorConstant or a
             # theano.tensor.sharedvar.SharedVariable, its value will be
             # available automatically in _compile_theano_function so
@@ -287,19 +326,24 @@ def draw_values(params, point=None, size=None):
             try:
                 # This may fail for autotransformed RVs, which don't
                 # have the random method
-                givens[next_.name] = (next_, _draw_value(next_,
-                                                         point=point,
-                                                         givens=temp_givens,
-                                                         size=size))
+                givens[next_.name] = (
+                    next_,
+                    _draw_value(next_, point=point, givens=temp_givens, size=size),
+                )
                 stored.add(next_.name)
             except theano.gof.fg.MissingInputError:
                 # The node failed, so we must add the node's parents to
                 # the stack of nodes to try to draw from. We exclude the
                 # nodes in the `params` list.
-                stack.extend([node for node in named_nodes_parents[next_]
-                              if node is not None and
-                              node.name not in stored and
-                              node not in params])
+                stack.extend(
+                    [
+                        node
+                        for node in named_nodes_parents[next_]
+                        if node is not None
+                        and node.name not in stored
+                        and node not in params
+                    ]
+                )
 
     # the below makes sure the graph is evaluated in order
     # test_distributions_random::TestDrawValues::test_draw_order fails without it
@@ -309,22 +353,28 @@ def draw_values(params, point=None, size=None):
     missing_inputs = set(params)
     while to_eval or missing_inputs:
         if to_eval == missing_inputs:
-            raise ValueError('Cannot resolve inputs for {}'.format([str(params[j]) for j in to_eval]))
+            raise ValueError(
+                "Cannot resolve inputs for {}".format([str(params[j]) for j in to_eval])
+            )
         to_eval = set(missing_inputs)
         missing_inputs = set()
         for param_idx in to_eval:
             param = params[param_idx]
-            if hasattr(param, 'name') and param.name in givens:
+            if hasattr(param, "name") and param.name in givens:
                 evaluated[param_idx] = givens[param.name][1]
             else:
                 try:  # might evaluate in a bad order,
-                    evaluated[param_idx] = _draw_value(param, point=point, givens=givens.values(), size=size)
-                    if isinstance(param, collections.Hashable) and named_nodes_parents.get(param):
+                    evaluated[param_idx] = _draw_value(
+                        param, point=point, givens=givens.values(), size=size
+                    )
+                    if isinstance(
+                        param, collections.Hashable
+                    ) and named_nodes_parents.get(param):
                         givens[param.name] = (param, evaluated[param_idx])
                 except theano.gof.fg.MissingInputError:
                     missing_inputs.add(param_idx)
 
-    return [evaluated[j] for j in params] # set the order back
+    return [evaluated[j] for j in params]  # set the order back
 
 
 @memoize
@@ -346,10 +396,14 @@ def _compile_theano_function(param, vars, givens=None):
     A compiled theano function that takes the values of `vars` as input
         positional args
     """
-    return function(vars, param, givens=givens,
-                    rebuild_strict=True,
-                    on_unused_input='ignore',
-                    allow_input_downcast=True)
+    return function(
+        vars,
+        param,
+        givens=givens,
+        rebuild_strict=True,
+        on_unused_input="ignore",
+        allow_input_downcast=True,
+    )
 
 
 def _draw_value(param, point=None, givens=None, size=None):
@@ -378,14 +432,16 @@ def _draw_value(param, point=None, givens=None, size=None):
     elif isinstance(param, tt.sharedvar.SharedVariable):
         return param.get_value()
     elif isinstance(param, (tt.TensorVariable, MultiObservedRV)):
-        if point and hasattr(param, 'model') and param.name in point:
+        if point and hasattr(param, "model") and param.name in point:
             return point[param.name]
-        elif hasattr(param, 'random') and param.random is not None:
+        elif hasattr(param, "random") and param.random is not None:
             return param.random(point=point, size=size)
-        elif (hasattr(param, 'distribution') and
-                hasattr(param.distribution, 'random') and
-                param.distribution.random is not None):
-            if hasattr(param, 'observations'):
+        elif (
+            hasattr(param, "distribution")
+            and hasattr(param.distribution, "random")
+            and param.distribution.random is not None
+        ):
+            if hasattr(param, "observations"):
                 # shape inspection for ObservedRV
                 dist_tmp = param.distribution
                 try:
@@ -411,11 +467,17 @@ def _draw_value(param, point=None, givens=None, size=None):
             else:
                 variables = values = []
             func = _compile_theano_function(param, variables)
-            if size and values and not all(var.dshape == val.shape for var, val in zip(variables, values)):
+            if (
+                size
+                and values
+                and not all(
+                    var.dshape == val.shape for var, val in zip(variables, values)
+                )
+            ):
                 return np.array([func(*v) for v in zip(*values)])
             else:
                 return func(*values)
-    raise ValueError('Unexpected type in draw_value: %s' % type(param))
+    raise ValueError("Unexpected type in draw_value: %s" % type(param))
 
 
 def to_tuple(shape):
@@ -424,15 +486,17 @@ def to_tuple(shape):
         return tuple()
     return tuple(np.atleast_1d(shape))
 
+
 def _is_one_d(dist_shape):
-    if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)):
+    if hasattr(dist_shape, "dshape") and dist_shape.dshape in ((), (0,), (1,)):
         return True
-    elif hasattr(dist_shape, 'shape') and dist_shape.shape in ((), (0,), (1,)):
+    elif hasattr(dist_shape, "shape") and dist_shape.shape in ((), (0,), (1,)):
         return True
     elif to_tuple(dist_shape) == ():
         return True
     return False
 
+
 def generate_samples(generator, *args, **kwargs):
     """Generate samples from the distribution of a random variable.
 
@@ -462,10 +526,10 @@ def generate_samples(generator, *args, **kwargs):
 
     Any remaining *args and **kwargs are passed on to the generator function.
     """
-    dist_shape = kwargs.pop('dist_shape', ())
+    dist_shape = kwargs.pop("dist_shape", ())
     one_d = _is_one_d(dist_shape)
-    size = kwargs.pop('size', None)
-    broadcast_shape = kwargs.pop('broadcast_shape', None)
+    size = kwargs.pop("size", None)
+    broadcast_shape = kwargs.pop("broadcast_shape", None)
     if size is None:
         size = 1
 
@@ -481,8 +545,13 @@ def generate_samples(generator, *args, **kwargs):
             broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
         except ValueError:  # sometimes happens if args have shape (500,) and (500, 4)
             max_dims = max(j.ndim for j in args + tuple(kwargs.values()))
-            args = tuple([j.reshape(j.shape + (1,) * (max_dims - j.ndim)) for j in args])
-            kwargs = {k: v.reshape(v.shape + (1,) * (max_dims - v.ndim)) for k, v in kwargs.items()}
+            args = tuple(
+                [j.reshape(j.shape + (1,) * (max_dims - j.ndim)) for j in args]
+            )
+            kwargs = {
+                k: v.reshape(v.shape + (1,) * (max_dims - v.ndim))
+                for k, v in kwargs.items()
+            }
             inputs = args + tuple(kwargs.values())
             broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
 
@@ -494,7 +563,7 @@ def generate_samples(generator, *args, **kwargs):
     if broadcast_shape in {(), (0,), (1,)}:
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     # Inputs already have the right shape. Just get the right size.
-    elif broadcast_shape[-len(dist_shape):] == dist_shape or len(dist_shape) == 0:
+    elif broadcast_shape[-len(dist_shape) :] == dist_shape or len(dist_shape) == 0:
         if size == 1 or (broadcast_shape == size_tup + dist_shape):
             samples = generator(size=broadcast_shape, *args, **kwargs)
         elif dist_shape == broadcast_shape:
@@ -502,26 +571,36 @@ def generate_samples(generator, *args, **kwargs):
         else:
             samples = None
     # Args have been broadcast correctly, can just ask for the right shape out
-    elif dist_shape[-len(broadcast_shape):] == broadcast_shape:
+    elif dist_shape[-len(broadcast_shape) :] == broadcast_shape:
         samples = generator(size=size_tup + dist_shape, *args, **kwargs)
     # Inputs have the right size, have to manually broadcast to the right dist_shape
-    elif broadcast_shape[:len(size_tup)] == size_tup:
-        suffix = broadcast_shape[len(size_tup):] + dist_shape
-        samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
+    elif broadcast_shape[: len(size_tup)] == size_tup:
+        suffix = broadcast_shape[len(size_tup) :] + dist_shape
+        samples = [
+            generator(*args, **kwargs).reshape(size_tup + (1,))
+            for _ in range(np.prod(suffix, dtype=int))
+        ]
         samples = np.hstack(samples).reshape(size_tup + suffix)
     else:
         samples = None
 
     if samples is None:
-        raise TypeError('''Attempted to generate values with incompatible shapes:
+        raise TypeError(
+            """Attempted to generate values with incompatible shapes:
             size: {size}
             dist_shape: {dist_shape}
             broadcast_shape: {broadcast_shape}
-        '''.format(size=size, dist_shape=dist_shape, broadcast_shape=broadcast_shape))
+        """.format(
+                size=size, dist_shape=dist_shape, broadcast_shape=broadcast_shape
+            )
+        )
 
     # reshape samples here
     if samples.shape[0] == 1 and size == 1:
-        if len(samples.shape) > len(dist_shape) and samples.shape[-len(dist_shape):] == dist_shape:
+        if (
+            len(samples.shape) > len(dist_shape)
+            and samples.shape[-len(dist_shape) :] == dist_shape
+        ):
             samples = samples.reshape(samples.shape[1:])
 
     if one_d and samples.shape[-1] == 1:
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 38f34d6c0a..fd7a47936c 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -19,7 +19,7 @@ def all_discrete(comp_dists):
 
 
 class Mixture(Distribution):
-    R"""
+    r"""
     Mixture log-likelihood
 
     Often used to model subpopulation heterogeneity
@@ -69,23 +69,23 @@ class Mixture(Distribution):
     """
 
     def __init__(self, w, comp_dists, *args, **kwargs):
-        shape = kwargs.pop('shape', ())
+        shape = kwargs.pop("shape", ())
 
         self.w = w = tt.as_tensor_variable(w)
         self.comp_dists = comp_dists
 
-        defaults = kwargs.pop('defaults', [])
+        defaults = kwargs.pop("defaults", [])
 
         if all_discrete(comp_dists):
-            dtype = kwargs.pop('dtype', 'int64')
+            dtype = kwargs.pop("dtype", "int64")
         else:
-            dtype = kwargs.pop('dtype', 'float64')
+            dtype = kwargs.pop("dtype", "float64")
 
             try:
                 self.mean = (w * self._comp_means()).sum(axis=-1)
 
-                if 'mean' not in defaults:
-                    defaults.append('mean')
+                if "mean" not in defaults:
+                    defaults.append("mean")
             except AttributeError:
                 pass
 
@@ -94,13 +94,12 @@ def __init__(self, w, comp_dists, *args, **kwargs):
             comp_mode_logps = self.logp(comp_modes)
             self.mode = comp_modes[tt.argmax(w * comp_mode_logps, axis=-1)]
 
-            if 'mode' not in defaults:
-                defaults.append('mode')
+            if "mode" not in defaults:
+                defaults.append("mode")
         except (AttributeError, ValueError, IndexError):
             pass
 
-        super(Mixture, self).__init__(shape, dtype, defaults=defaults,
-                                      *args, **kwargs)
+        super(Mixture, self).__init__(shape, dtype, defaults=defaults, *args, **kwargs)
 
     def _comp_logp(self, value):
         comp_dists = self.comp_dists
@@ -110,41 +109,49 @@ def _comp_logp(self, value):
 
             return comp_dists.logp(value_)
         except AttributeError:
-            return tt.squeeze(tt.stack([comp_dist.logp(value)
-                                        for comp_dist in comp_dists],
-                                       axis=1))
+            return tt.squeeze(
+                tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=1)
+            )
 
     def _comp_means(self):
         try:
             return tt.as_tensor_variable(self.comp_dists.mean)
         except AttributeError:
-            return tt.squeeze(tt.stack([comp_dist.mean
-                                        for comp_dist in self.comp_dists],
-                                       axis=1))
+            return tt.squeeze(
+                tt.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=1)
+            )
 
     def _comp_modes(self):
         try:
             return tt.as_tensor_variable(self.comp_dists.mode)
         except AttributeError:
-            return tt.squeeze(tt.stack([comp_dist.mode
-                                        for comp_dist in self.comp_dists],
-                                       axis=1))
+            return tt.squeeze(
+                tt.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=1)
+            )
 
     def _comp_samples(self, point=None, size=None):
         try:
             samples = self.comp_dists.random(point=point, size=size)
         except AttributeError:
-            samples = np.column_stack([comp_dist.random(point=point, size=size)
-                                       for comp_dist in self.comp_dists])
+            samples = np.column_stack(
+                [
+                    comp_dist.random(point=point, size=size)
+                    for comp_dist in self.comp_dists
+                ]
+            )
 
         return np.squeeze(samples)
 
     def logp(self, value):
         w = self.w
 
-        return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1),
-                     w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1),
-                     broadcast_conditions=False)
+        return bound(
+            logsumexp(tt.log(w) + self._comp_logp(value), axis=-1),
+            w >= 0,
+            w <= 1,
+            tt.allclose(w.sum(axis=-1), 1),
+            broadcast_conditions=False,
+        )
 
     def random(self, point=None, size=None):
         w = draw_values([self.w], point=point)[0]
@@ -157,26 +164,34 @@ def random(self, point=None, size=None):
         # Normalize inputs
         w /= w.sum(axis=-1, keepdims=True)
 
-        w_samples = generate_samples(random_choice,
-                                     p=w,
-                                     broadcast_shape=w.shape[:-1] or (1,),
-                                     dist_shape=distshape,
-                                     size=size).squeeze()
+        w_samples = generate_samples(
+            random_choice,
+            p=w,
+            broadcast_shape=w.shape[:-1] or (1,),
+            dist_shape=distshape,
+            size=size,
+        ).squeeze()
         if (size is None) or (distshape.size == 0):
             comp_samples = self._comp_samples(point=point, size=size)
             if comp_samples.ndim > 1:
-                samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
+                samples = np.squeeze(
+                    comp_samples[np.arange(w_samples.size), ..., w_samples]
+                )
             else:
                 samples = np.squeeze(comp_samples[w_samples])
         else:
             if w_samples.ndim == 1:
-                w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape)
-            samples = np.zeros((size,)+tuple(distshape))
+                w_samples = np.reshape(
+                    np.tile(w_samples, size), (size,) + w_samples.shape
+                )
+            samples = np.zeros((size,) + tuple(distshape))
             for i in range(size):
                 w_tmp = w_samples[i, :]
                 comp_tmp = self._comp_samples(point=point, size=None)
                 if comp_tmp.ndim > 1:
-                    samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
+                    samples[i, :] = np.squeeze(
+                        comp_tmp[np.arange(w_tmp.size), ..., w_tmp]
+                    )
                 else:
                     samples[i, :] = np.squeeze(comp_tmp[w_tmp])
 
@@ -184,7 +199,7 @@ def random(self, point=None, size=None):
 
 
 class NormalMixture(Mixture):
-    R"""
+    r"""
     Normal mixture log-likelihood
 
     .. math::
@@ -217,14 +232,14 @@ class NormalMixture(Mixture):
     """
 
     def __init__(self, w, mu, comp_shape=(), *args, **kwargs):
-        _, sd = get_tau_sd(tau=kwargs.pop('tau', None),
-                           sd=kwargs.pop('sd', None))
+        _, sd = get_tau_sd(tau=kwargs.pop("tau", None), sd=kwargs.pop("sd", None))
 
         self.mu = mu = tt.as_tensor_variable(mu)
         self.sd = sd = tt.as_tensor_variable(sd)
 
-        super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=comp_shape),
-                                            *args, **kwargs)
+        super(NormalMixture, self).__init__(
+            w, Normal.dist(mu, sd=sd, shape=comp_shape), *args, **kwargs
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -232,8 +247,7 @@ def _repr_latex_(self, name=None, dist=None):
         mu = dist.mu
         w = dist.w
         sd = dist.sd
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{NormalMixture}}(\mathit{{w}}={},~\mathit{{mu}}={},~\mathit{{sigma}}={})$'.format(name,
-                                                get_variable_name(w),
-                                                get_variable_name(mu),
-                                                get_variable_name(sd))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{NormalMixture}}(\mathit{{w}}={},~\mathit{{mu}}={},~\mathit{{sigma}}={})$".format(
+            name, get_variable_name(w), get_variable_name(mu), get_variable_name(sd)
+        )
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 5c45d34f8b..d682a86427 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -24,15 +24,24 @@
 from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker
 
 
-__all__ = ['MvNormal', 'MvStudentT', 'Dirichlet',
-           'Multinomial', 'Wishart', 'WishartBartlett',
-           'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal',
-           'KroneckerNormal']
+__all__ = [
+    "MvNormal",
+    "MvStudentT",
+    "Dirichlet",
+    "Multinomial",
+    "Wishart",
+    "WishartBartlett",
+    "LKJCorr",
+    "LKJCholeskyCov",
+    "MatrixNormal",
+    "KroneckerNormal",
+]
 
 
 class _QuadFormBase(Continuous):
-    def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True,
-                 *args, **kwargs):
+    def __init__(
+        self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, **kwargs
+    ):
         super(_QuadFormBase, self).__init__(*args, **kwargs)
         if len(self.shape) > 2:
             raise ValueError("Only 1 or 2 dimensions are allowed.")
@@ -40,40 +49,42 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True,
         if chol is not None and not lower:
             chol = chol.T
         if len([i for i in [tau, cov, chol] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of tau, cov, '
-                             'or chol.')
+            raise ValueError(
+                "Incompatible parameterization. "
+                "Specify exactly one of tau, cov, "
+                "or chol."
+            )
         self.mu = mu = tt.as_tensor_variable(mu)
         self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
         # Step methods and advi do not catch LinAlgErrors at the
         # moment. We work around that by using a cholesky op
         # that returns a nan as first entry instead of raising
         # an error.
-        cholesky = Cholesky(lower=True, on_error='nan')
+        cholesky = Cholesky(lower=True, on_error="nan")
 
         if cov is not None:
             self.k = cov.shape[0]
-            self._cov_type = 'cov'
+            self._cov_type = "cov"
             cov = tt.as_tensor_variable(cov)
             if cov.ndim != 2:
-                raise ValueError('cov must be two dimensional.')
+                raise ValueError("cov must be two dimensional.")
             self.chol_cov = cholesky(cov)
             self.cov = cov
             self._n = self.cov.shape[-1]
         elif tau is not None:
             self.k = tau.shape[0]
-            self._cov_type = 'tau'
+            self._cov_type = "tau"
             tau = tt.as_tensor_variable(tau)
             if tau.ndim != 2:
-                raise ValueError('tau must be two dimensional.')
+                raise ValueError("tau must be two dimensional.")
             self.chol_tau = cholesky(tau)
             self.tau = tau
             self._n = self.tau.shape[-1]
         else:
             self.k = chol.shape[0]
-            self._cov_type = 'chol'
+            self._cov_type = "chol"
             if chol.ndim != 2:
-                raise ValueError('chol must be two dimensional.')
+                raise ValueError("chol must be two dimensional.")
             self.chol_cov = tt.as_tensor_variable(chol)
             self._n = self.chol_cov.shape[-1]
 
@@ -81,7 +92,7 @@ def _quaddist(self, value):
         """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma."""
         mu = self.mu
         if value.ndim > 2 or value.ndim == 0:
-            raise ValueError('Invalid dimension for value: %s' % value.ndim)
+            raise ValueError("Invalid dimension for value: %s" % value.ndim)
         if value.ndim == 1:
             onedim = True
             value = value[None, :]
@@ -90,11 +101,11 @@ def _quaddist(self, value):
 
         delta = value - mu
 
-        if self._cov_type == 'cov':
+        if self._cov_type == "cov":
             # Use this when Theano#5908 is released.
             # return MvNormalLogp()(self.cov, delta)
             dist, logdet, ok = self._quaddist_cov(delta)
-        elif self._cov_type == 'tau':
+        elif self._cov_type == "tau":
             dist, logdet, ok = self._quaddist_tau(delta)
         else:
             dist, logdet, ok = self._quaddist_chol(delta)
@@ -140,19 +151,19 @@ def _quaddist_tau(self, delta):
     def _repr_cov_params(self, dist=None):
         if dist is None:
             dist = self
-        if self._cov_type == 'chol':
+        if self._cov_type == "chol":
             chol = get_variable_name(self.chol)
-            return r'\mathit{{chol}}={}'.format(chol)
-        elif self._cov_type == 'cov':
+            return r"\mathit{{chol}}={}".format(chol)
+        elif self._cov_type == "cov":
             cov = get_variable_name(self.cov)
-            return r'\mathit{{cov}}={}'.format(cov)
-        elif self._cov_type == 'tau':
+            return r"\mathit{{cov}}={}".format(cov)
+        elif self._cov_type == "tau":
             tau = get_variable_name(self.tau)
-            return r'\mathit{{tau}}={}'.format(tau)
+            return r"\mathit{{tau}}={}".format(tau)
 
 
 class MvNormal(_QuadFormBase):
-    R"""
+    r"""
     Multivariate normal log-likelihood.
 
     .. math::
@@ -218,10 +229,10 @@ class MvNormal(_QuadFormBase):
         vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T)
     """
 
-    def __init__(self, mu, cov=None, tau=None, chol=None, lower=True,
-                 *args, **kwargs):
-        super(MvNormal, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol,
-                                       lower=lower, *args, **kwargs)
+    def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs):
+        super(MvNormal, self).__init__(
+            mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs
+        )
         self.mean = self.median = self.mode = self.mu = self.mu
 
     def random(self, point=None, size=None):
@@ -233,19 +244,18 @@ def random(self, point=None, size=None):
             except TypeError:
                 size = [size]
 
-        if self._cov_type == 'cov':
+        if self._cov_type == "cov":
             mu, cov = draw_values([self.mu, self.cov], point=point, size=size)
             if mu.shape[-1] != cov.shape[-1]:
                 raise ValueError("Shapes for mu and cov don't match")
 
             try:
-                dist = stats.multivariate_normal(
-                    mean=mu, cov=cov, allow_singular=True)
+                dist = stats.multivariate_normal(mean=mu, cov=cov, allow_singular=True)
             except ValueError:
                 size.append(mu.shape[-1])
                 return np.nan * np.zeros(size)
             return dist.rvs(size)
-        elif self._cov_type == 'chol':
+        elif self._cov_type == "chol":
             mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size)
             if mu.shape[-1] != chol[0].shape[-1]:
                 raise ValueError("Shapes for mu and chol don't match")
@@ -265,14 +275,13 @@ def random(self, point=None, size=None):
                 return np.nan * np.zeros(size)
 
             standard_normal = np.random.standard_normal(size)
-            transformed = linalg.solve_triangular(
-                chol, standard_normal.T, lower=True)
+            transformed = linalg.solve_triangular(chol, standard_normal.T, lower=True)
             return mu + transformed.T
 
     def logp(self, value):
         quaddist, logdet, ok = self._quaddist(value)
         k = value.shape[-1].astype(theano.config.floatX)
-        norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi))
+        norm = -0.5 * k * pm.floatX(np.log(2 * np.pi))
         return bound(norm - 0.5 * quaddist - logdet, ok)
 
     def _repr_latex_(self, name=None, dist=None):
@@ -280,13 +289,13 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         mu = dist.mu
         name_mu = get_variable_name(mu)
-        return (r'${} \sim \text{{MvNormal}}'
-                r'(\mathit{{mu}}={}, {})$'
-                .format(name, name_mu, self._repr_cov_params(dist)))
+        return r"${} \sim \text{{MvNormal}}" r"(\mathit{{mu}}={}, {})$".format(
+            name, name_mu, self._repr_cov_params(dist)
+        )
 
 
 class MvStudentT(_QuadFormBase):
-    R"""
+    r"""
     Multivariate Student-T log-likelihood.
 
     .. math::
@@ -326,23 +335,34 @@ class MvStudentT(_QuadFormBase):
         Whether the cholesky fatcor is given as a lower triangular matrix.
     """
 
-    def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None,
-                 lower=True, *args, **kwargs):
+    def __init__(
+        self,
+        nu,
+        Sigma=None,
+        mu=None,
+        cov=None,
+        tau=None,
+        chol=None,
+        lower=True,
+        *args,
+        **kwargs
+    ):
         if Sigma is not None:
             if cov is not None:
-                raise ValueError('Specify only one of cov and Sigma')
+                raise ValueError("Specify only one of cov and Sigma")
             cov = Sigma
-        super(MvStudentT, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol,
-                                         lower=lower, *args, **kwargs)
+        super(MvStudentT, self).__init__(
+            mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs
+        )
         self.nu = nu = tt.as_tensor_variable(nu)
         self.mean = self.median = self.mode = self.mu = self.mu
 
     def random(self, point=None, size=None):
         nu, mu = draw_values([self.nu, self.mu], point=point, size=size)
-        if self._cov_type == 'cov':
+        if self._cov_type == "cov":
             cov, = draw_values([self.cov], point=point, size=size)
             dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov)
-        elif self._cov_type == 'tau':
+        elif self._cov_type == "tau":
             tau, = draw_values([self.tau], point=point, size=size)
             dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau)
         else:
@@ -358,10 +378,12 @@ def logp(self, value):
         quaddist, logdet, ok = self._quaddist(value)
         k = value.shape[-1].astype(theano.config.floatX)
 
-        norm = (gammaln((self.nu + k) / 2.)
-                - gammaln(self.nu / 2.)
-                - 0.5 * k * floatX(np.log(self.nu * np.pi)))
-        inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu)
+        norm = (
+            gammaln((self.nu + k) / 2.0)
+            - gammaln(self.nu / 2.0)
+            - 0.5 * k * floatX(np.log(self.nu * np.pi))
+        )
+        inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu)
         return bound(norm + inner - logdet, ok)
 
     def _repr_latex_(self, name=None, dist=None):
@@ -371,14 +393,15 @@ def _repr_latex_(self, name=None, dist=None):
         nu = dist.nu
         name_nu = get_variable_name(nu)
         name_mu = get_variable_name(mu)
-        return (r'${} \sim \text{{MvStudentT}}'
-                r'(\mathit{{nu}}={}, \mathit{{mu}}={}, '
-                r'{})$'
-                .format(name, name_nu, name_mu, self._repr_cov_params(dist)))
+        return (
+            r"${} \sim \text{{MvStudentT}}"
+            r"(\mathit{{nu}}={}, \mathit{{mu}}={}, "
+            r"{})$".format(name, name_nu, name_mu, self._repr_cov_params(dist))
+        )
 
 
 class Dirichlet(Continuous):
-    R"""
+    r"""
     Dirichlet log-likelihood.
 
     .. math::
@@ -401,8 +424,7 @@ class Dirichlet(Continuous):
         Concentration parameters (a > 0).
     """
 
-    def __init__(self, a, transform=transforms.stick_breaking,
-                 *args, **kwargs):
+    def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs):
         shape = np.atleast_1d(a.shape)[-1]
 
         kwargs.setdefault("shape", shape)
@@ -413,15 +435,13 @@ def __init__(self, a, transform=transforms.stick_breaking,
         self.a = a = tt.as_tensor_variable(a)
         self.mean = a / tt.sum(a)
 
-        self.mode = tt.switch(tt.all(a > 1),
-                              (a - 1) / tt.sum(a - 1),
-                              np.nan)
+        self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan)
 
     def _random(self, a, size=None):
         gen = stats.dirichlet.rvs
         shape = tuple(np.atleast_1d(self.shape))
-        if size[-len(shape):] == shape:
-            real_size = size[:-len(shape)]
+        if size[-len(shape) :] == shape:
+            real_size = size[: -len(shape)]
         else:
             real_size = size
         if self.size_prefix:
@@ -440,10 +460,7 @@ def _random(self, a, size=None):
 
     def random(self, point=None, size=None):
         a = draw_values([self.a], point=point, size=size)[0]
-        samples = generate_samples(self._random,
-                                   a=a,
-                                   dist_shape=self.shape,
-                                   size=size)
+        samples = generate_samples(self._random, a=a, dist_shape=self.shape, size=size)
         return samples
 
     def logp(self, value):
@@ -451,22 +468,27 @@ def logp(self, value):
         a = self.a
 
         # only defined for sum(value) == 1
-        return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1)
-                     + gammaln(tt.sum(a, axis=-1)),
-                     tt.all(value >= 0), tt.all(value <= 1),
-                     k > 1, tt.all(a > 0),
-                     broadcast_conditions=False)
+        return bound(
+            tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1)
+            + gammaln(tt.sum(a, axis=-1)),
+            tt.all(value >= 0),
+            tt.all(value <= 1),
+            k > 1,
+            tt.all(a > 0),
+            broadcast_conditions=False,
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         a = dist.a
-        return r'${} \sim \text{{Dirichlet}}(\mathit{{a}}={})$'.format(name,
-                                                get_variable_name(a))
+        return r"${} \sim \text{{Dirichlet}}(\mathit{{a}}={})$".format(
+            name, get_variable_name(a)
+        )
 
 
 class Multinomial(Discrete):
-    R"""
+    r"""
     Multinomial log-likelihood.
 
     Generalizes binomial distribution, but instead of each trial resulting
@@ -502,7 +524,7 @@ def __init__(self, n, p, *args, **kwargs):
         super(Multinomial, self).__init__(*args, **kwargs)
 
         p = p / tt.sum(p, axis=-1, keepdims=True)
-        n = np.squeeze(n) # works also if n is a tensor
+        n = np.squeeze(n)  # works also if n is a tensor
 
         if len(self.shape) > 1:
             m = self.shape[-2]
@@ -521,22 +543,23 @@ def __init__(self, n, p, *args, **kwargs):
             self.p = tt.as_tensor_variable(p)
 
         self.mean = self.n * self.p
-        mode = tt.cast(tt.round(self.mean), 'int32')
+        mode = tt.cast(tt.round(self.mean), "int32")
         diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
         inc_bool_arr = tt.abs_(diff) > 0
-        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
-                                diff[inc_bool_arr.nonzero()])
+        mode = tt.inc_subtensor(
+            mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]
+        )
         self.mode = mode
 
     def _random(self, n, p, size=None):
         original_dtype = p.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
-        p = p.astype('float64')
+        p = p.astype("float64")
         # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
         if size == p.shape:
             size = None
-        elif size[-len(p.shape):] == p.shape:
-            size = size[:len(size) - len(p.shape)]
+        elif size[-len(p.shape) :] == p.shape:
+            size = size[: len(size) - len(p.shape)]
 
         n_dim = n.squeeze().ndim
 
@@ -545,32 +568,27 @@ def _random(self, n, p, size=None):
             randnum = np.random.multinomial(n, p.squeeze(), size=size)
         elif (n_dim == 0) and (p.ndim > 1):
             p = p / p.sum(axis=1, keepdims=True)
-            randnum = np.asarray([
-                np.random.multinomial(n.squeeze(), pp, size=size)
-                for pp in p
-            ])
+            randnum = np.asarray(
+                [np.random.multinomial(n.squeeze(), pp, size=size) for pp in p]
+            )
             randnum = np.moveaxis(randnum, 1, 0)
         elif (n_dim > 0) and (p.ndim == 1):
             p = p / p.sum()
-            randnum = np.asarray([
-                np.random.multinomial(nn, p.squeeze(), size=size)
-                for nn in n
-            ])
+            randnum = np.asarray(
+                [np.random.multinomial(nn, p.squeeze(), size=size) for nn in n]
+            )
             randnum = np.moveaxis(randnum, 1, 0)
         else:
             p = p / p.sum(axis=1, keepdims=True)
-            randnum = np.asarray([
-                np.random.multinomial(nn, pp, size=size)
-                for (nn, pp) in zip(n, p)
-            ])
+            randnum = np.asarray(
+                [np.random.multinomial(nn, pp, size=size) for (nn, pp) in zip(n, p)]
+            )
             randnum = np.moveaxis(randnum, 1, 0)
         return randnum.astype(original_dtype)
 
     def random(self, point=None, size=None):
         n, p = draw_values([self.n, self.p], point=point, size=size)
-        samples = generate_samples(self._random, n, p,
-                                   dist_shape=self.shape,
-                                   size=size)
+        samples = generate_samples(self._random, n, p, dist_shape=self.shape, size=size)
         return samples
 
     def logp(self, x):
@@ -584,7 +602,7 @@ def logp(self, x):
             tt.all(p <= 1),
             tt.all(tt.eq(tt.sum(p, axis=-1), 1)),
             tt.all(tt.ge(n, 0)),
-            broadcast_conditions=False
+            broadcast_conditions=False,
         )
 
     def _repr_latex_(self, name=None, dist=None):
@@ -592,9 +610,9 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         n = dist.n
         p = dist.p
-        return r'${} \sim \text{{Multinomial}}(\mathit{{n}}={}, \mathit{{p}}={})$'.format(name,
-                                                get_variable_name(n),
-                                                get_variable_name(p))
+        return r"${} \sim \text{{Multinomial}}(\mathit{{n}}={}, \mathit{{p}}={})$".format(
+            name, get_variable_name(n), get_variable_name(p)
+        )
 
 
 def posdef(AA):
@@ -619,7 +637,7 @@ class PosDefMatrix(theano.Op):
     def make_node(self, x):
         x = tt.as_tensor_variable(x)
         assert x.ndim == 2
-        o = tt.TensorType(dtype='int8', broadcastable=[])()
+        o = tt.TensorType(dtype="int8", broadcastable=[])()
         return theano.Apply(self, [x], [o])
 
     # Python implementation:
@@ -628,9 +646,9 @@ def perform(self, node, inputs, outputs):
         (x,) = inputs
         (z,) = outputs
         try:
-            z[0] = np.array(posdef(x), dtype='int8')
+            z[0] = np.array(posdef(x), dtype="int8")
         except Exception:
-            pm._log.exception('Failed to check if %s positive definite', x)
+            pm._log.exception("Failed to check if %s positive definite", x)
             raise
 
     def infer_shape(self, node, shapes):
@@ -643,11 +661,12 @@ def grad(self, inp, grads):
     def __str__(self):
         return "MatrixIsPositiveDefinite"
 
+
 matrix_pos_def = PosDefMatrix()
 
 
 class Wishart(Continuous):
-    R"""
+    r"""
     Wishart log-likelihood.
 
     The Wishart distribution is the probability distribution of the
@@ -685,26 +704,27 @@ class Wishart(Continuous):
 
     def __init__(self, nu, V, *args, **kwargs):
         super(Wishart, self).__init__(*args, **kwargs)
-        warnings.warn('The Wishart distribution can currently not be used '
-                      'for MCMC sampling. The probability of sampling a '
-                      'symmetric matrix is basically zero. Instead, please '
-                      'use LKJCholeskyCov or LKJCorr. For more information '
-                      'on the issues surrounding the Wishart see here: '
-                      'https://github.com/pymc-devs/pymc3/issues/538.',
-                      UserWarning)
+        warnings.warn(
+            "The Wishart distribution can currently not be used "
+            "for MCMC sampling. The probability of sampling a "
+            "symmetric matrix is basically zero. Instead, please "
+            "use LKJCholeskyCov or LKJCorr. For more information "
+            "on the issues surrounding the Wishart see here: "
+            "https://github.com/pymc-devs/pymc3/issues/538.",
+            UserWarning,
+        )
         self.nu = nu = tt.as_tensor_variable(nu)
         self.p = p = tt.as_tensor_variable(V.shape[0])
         self.V = V = tt.as_tensor_variable(V)
         self.mean = nu * V
-        self.mode = tt.switch(tt.ge(nu, p + 1),
-                              (nu - p - 1) * V,
-                              np.nan)
+        self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan)
 
     def random(self, point=None, size=None):
         nu, V = draw_values([self.nu, self.V], point=point, size=size)
-        size= 1 if size is None else size
-        return generate_samples(stats.wishart.rvs, np.asscalar(nu), V,
-                                    broadcast_shape=(size,))
+        size = 1 if size is None else size
+        return generate_samples(
+            stats.wishart.rvs, np.asscalar(nu), V, broadcast_shape=(size,)
+        )
 
     def logp(self, X):
         nu = self.nu
@@ -714,14 +734,19 @@ def logp(self, X):
         IVI = det(V)
         IXI = det(X)
 
-        return bound(((nu - p - 1) * tt.log(IXI)
-                      - trace(matrix_inverse(V).dot(X))
-                      - nu * p * tt.log(2) - nu * tt.log(IVI)
-                      - 2 * multigammaln(nu / 2., p)) / 2,
-                     matrix_pos_def(X),
-                     tt.eq(X, X.T),
-                     nu > (p - 1),
-                     broadcast_conditions=False
+        return bound(
+            (
+                (nu - p - 1) * tt.log(IXI)
+                - trace(matrix_inverse(V).dot(X))
+                - nu * p * tt.log(2)
+                - nu * tt.log(IVI)
+                - 2 * multigammaln(nu / 2.0, p)
+            )
+            / 2,
+            matrix_pos_def(X),
+            tt.eq(X, X.T),
+            nu > (p - 1),
+            broadcast_conditions=False,
         )
 
     def _repr_latex_(self, name=None, dist=None):
@@ -729,12 +754,15 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         nu = dist.nu
         V = dist.V
-        return r'${} \sim \text{{Wishart}}(\mathit{{nu}}={}, \mathit{{V}}={})$'.format(name,
-                                                get_variable_name(nu),
-                                                get_variable_name(V))
+        return r"${} \sim \text{{Wishart}}(\mathit{{nu}}={}, \mathit{{V}}={})$".format(
+            name, get_variable_name(nu), get_variable_name(V)
+        )
 
-def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None):
-    R"""
+
+def WishartBartlett(
+    name, S, nu, is_cholesky=False, return_cholesky=False, testval=None
+):
+    r"""
     Bartlett decomposition of the Wishart distribution. As the Wishart
     distribution requires the matrix to be symmetric positive semi-definite
     it is impossible for MCMC to ever propose acceptable matrices.
@@ -791,17 +819,20 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
         # Inverse transform
         testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T))
         testval = linalg.cholesky(testval, lower=True)
-        diag_testval = testval[diag_idx]**2
+        diag_testval = testval[diag_idx] ** 2
         tril_testval = testval[tril_idx]
     else:
         diag_testval = None
         tril_testval = None
 
-    c = tt.sqrt(ChiSquared('c', nu - np.arange(2, 2 + n_diag), shape=n_diag,
-                           testval=diag_testval))
-    pm._log.info('Added new variable c to model diagonal of Wishart.')
-    z = Normal('z', 0., 1., shape=n_tril, testval=tril_testval)
-    pm._log.info('Added new variable z to model off-diagonals of Wishart.')
+    c = tt.sqrt(
+        ChiSquared(
+            "c", nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval
+        )
+    )
+    pm._log.info("Added new variable c to model diagonal of Wishart.")
+    z = Normal("z", 0.0, 1.0, shape=n_tril, testval=tril_testval)
+    pm._log.info("Added new variable z to model off-diagonals of Wishart.")
     # Construct A matrix
     A = tt.zeros(S.shape, dtype=np.float32)
     A = tt.set_subtensor(A[diag_idx], c)
@@ -816,25 +847,29 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
 
 def _lkj_normalizing_constant(eta, n):
     if eta == 1:
-        result = gammaln(2. * tt.arange(1, int((n - 1) / 2) + 1)).sum()
+        result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum()
         if n % 2 == 1:
-            result += (0.25 * (n ** 2 - 1) * tt.log(np.pi)
-                       - 0.25 * (n - 1) ** 2 * tt.log(2.)
-                       - (n - 1) * gammaln(int((n + 1) / 2)))
+            result += (
+                0.25 * (n ** 2 - 1) * tt.log(np.pi)
+                - 0.25 * (n - 1) ** 2 * tt.log(2.0)
+                - (n - 1) * gammaln(int((n + 1) / 2))
+            )
         else:
-            result += (0.25 * n * (n - 2) * tt.log(np.pi)
-                       + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.)
-                       + n * gammaln(n / 2) - (n - 1) * gammaln(n))
+            result += (
+                0.25 * n * (n - 2) * tt.log(np.pi)
+                + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0)
+                + n * gammaln(n / 2)
+                - (n - 1) * gammaln(n)
+            )
     else:
         result = -(n - 1) * gammaln(eta + 0.5 * (n - 1))
         k = tt.arange(1, n)
-        result += (0.5 * k * tt.log(np.pi)
-                   + gammaln(eta + 0.5 * (n - 1 - k))).sum()
+        result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum()
     return result
 
 
 class LKJCholeskyCov(Continuous):
-    R"""Covariance matrix with LKJ distributed correlations.
+    r"""Covariance matrix with LKJ distributed correlations.
 
     This defines a distribution over cholesky decomposed covariance
     matrices, such that the underlying correlation matrices follow an
@@ -941,24 +976,25 @@ class LKJCholeskyCov(Continuous):
        determinant, URL (version: 2012-04-14):
        http://math.stackexchange.com/q/130026
     """
+
     def __init__(self, eta, n, sd_dist, *args, **kwargs):
         self.n = n
         self.eta = eta
 
-        if 'transform' in kwargs:
-            raise ValueError('Invalid parameter: transform.')
-        if 'shape' in kwargs:
-            raise ValueError('Invalid parameter: shape.')
+        if "transform" in kwargs:
+            raise ValueError("Invalid parameter: transform.")
+        if "shape" in kwargs:
+            raise ValueError("Invalid parameter: shape.")
 
         shape = n * (n + 1) // 2
 
         if sd_dist.shape.ndim not in [0, 1]:
-            raise ValueError('Invalid shape for sd_dist.')
+            raise ValueError("Invalid shape for sd_dist.")
 
         transform = transforms.CholeskyCovPacked(n)
 
-        kwargs['shape'] = shape
-        kwargs['transform'] = transform
+        kwargs["shape"] = shape
+        kwargs["transform"] = transform
         super(LKJCholeskyCov, self).__init__(*args, **kwargs)
 
         self.sd_dist = sd_dist
@@ -976,8 +1012,8 @@ def logp(self, x):
         variance = tt.zeros(n)
         variance = tt.inc_subtensor(variance[0], x[0] ** 2)
         variance = tt.inc_subtensor(
-            variance[1:],
-            cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
+            variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]
+        )
         sd_vals = tt.sqrt(variance)
 
         logp_sd = self.sd_dist.logp(sd_vals).sum()
@@ -998,7 +1034,7 @@ def logp(self, x):
 
 
 class LKJCorr(Continuous):
-    R"""
+    r"""
     The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood.
 
     The LKJ distribution is a prior distribution for correlation matrices.
@@ -1039,12 +1075,14 @@ class LKJCorr(Continuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs):
+    def __init__(self, eta=None, n=None, p=None, transform="interval", *args, **kwargs):
         if (p is not None) and (n is not None) and (eta is None):
-            warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta '
-                          'dimension parameter p -> n. Please update your code. '
-                          'Automatically re-assigning parameters for backwards compatibility.',
-                          DeprecationWarning)
+            warnings.warn(
+                "Parameters to LKJCorr have changed: shape parameter n -> eta "
+                "dimension parameter p -> n. Please update your code. "
+                "Automatically re-assigning parameters for backwards compatibility.",
+                DeprecationWarning,
+            )
             self.n = p
             self.eta = n
             eta = self.eta
@@ -1053,21 +1091,24 @@ def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwar
             self.n = n
             self.eta = eta
         else:
-            raise ValueError('Invalid parameter: please use eta as the shape parameter and '
-                             'n as the dimension parameter.')
+            raise ValueError(
+                "Invalid parameter: please use eta as the shape parameter and "
+                "n as the dimension parameter."
+            )
 
         shape = n * (n - 1) // 2
         self.mean = floatX(np.zeros(shape))
 
-        if transform == 'interval':
+        if transform == "interval":
             transform = transforms.interval(-1, 1)
 
-        super(LKJCorr, self).__init__(shape=shape, transform=transform,
-                                      *args, **kwargs)
-        warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta '
-                      'dimension parameter p -> n. Please double check your initialization.',
-                      DeprecationWarning)
-        self.tri_index = np.zeros([n, n], dtype='int32')
+        super(LKJCorr, self).__init__(shape=shape, transform=transform, *args, **kwargs)
+        warnings.warn(
+            "Parameters in LKJCorr have been rename: shape parameter n -> eta "
+            "dimension parameter p -> n. Please double check your initialization.",
+            DeprecationWarning,
+        )
+        self.tri_index = np.zeros([n, n], dtype="int32")
         self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape)
         self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape)
 
@@ -1075,28 +1116,27 @@ def _random(self, n, eta, size=None):
         size = size if isinstance(size, tuple) else (size,)
         # original implementation in R see:
         # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
-        beta = eta - 1 + n/2
+        beta = eta - 1 + n / 2
         r12 = 2 * stats.beta.rvs(a=beta, b=beta, size=size) - 1
         P = np.eye(n)[:, :, np.newaxis] * np.ones(size)
         P[0, 1] = r12
-        P[1, 1] = np.sqrt(1 - r12**2)
+        P[1, 1] = np.sqrt(1 - r12 ** 2)
         if n > 2:
-            for m in range(1, n-1):
+            for m in range(1, n - 1):
                 beta -= 0.5
-                y = stats.beta.rvs(a=(m+1) / 2., b=beta, size=size)
-                z = stats.norm.rvs(loc=0, scale=1, size=(m+1, ) + size)
-                z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
-                P[0:m+1, m+1] = np.sqrt(y) * z
-                P[m+1, m+1] = np.sqrt(1 - y)
-        Pt = np.transpose(P, (2, 0 ,1))
-        C = np.einsum('...ji,...jk->...ik', Pt, Pt)
+                y = stats.beta.rvs(a=(m + 1) / 2.0, b=beta, size=size)
+                z = stats.norm.rvs(loc=0, scale=1, size=(m + 1,) + size)
+                z = z / np.sqrt(np.einsum("ij,ij->j", z, z))
+                P[0 : m + 1, m + 1] = np.sqrt(y) * z
+                P[m + 1, m + 1] = np.sqrt(1 - y)
+        Pt = np.transpose(P, (2, 0, 1))
+        C = np.einsum("...ji,...jk->...ik", Pt, Pt)
         return C.transpose((1, 2, 0))[np.triu_indices(n, k=1)].T
 
     def random(self, point=None, size=None):
         n, eta = draw_values([self.n, self.eta], point=point, size=size)
-        size= 1 if size is None else size
-        samples = generate_samples(self._random, n, eta,
-                                   broadcast_shape=(size,))
+        size = 1 if size is None else size
+        samples = generate_samples(self._random, n, eta, broadcast_shape=(size,))
         return samples
 
     def logp(self, x):
@@ -1107,17 +1147,19 @@ def logp(self, x):
         X = tt.fill_diagonal(X, 1)
 
         result = _lkj_normalizing_constant(eta, n)
-        result += (eta - 1.) * tt.log(det(X))
-        return bound(result,
-                     tt.all(X <= 1), tt.all(X >= -1),
-                     matrix_pos_def(X),
-                     eta > 0,
-                     broadcast_conditions=False
+        result += (eta - 1.0) * tt.log(det(X))
+        return bound(
+            result,
+            tt.all(X <= 1),
+            tt.all(X >= -1),
+            matrix_pos_def(X),
+            eta > 0,
+            broadcast_conditions=False,
         )
 
 
 class MatrixNormal(Continuous):
-    R"""
+    r"""
     Matrix-valued normal log-likelihood.
 
     .. math::
@@ -1205,12 +1247,22 @@ class MatrixNormal(Continuous):
                                    observed=data, shape=(m, n))
     """
 
-    def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None,
-                 colcov=None, colchol=None, coltau=None, shape=None, *args,
-                 **kwargs):
+    def __init__(
+        self,
+        mu=0,
+        rowcov=None,
+        rowchol=None,
+        rowtau=None,
+        colcov=None,
+        colchol=None,
+        coltau=None,
+        shape=None,
+        *args,
+        **kwargs
+    ):
         self._setup_matrices(colcov, colchol, coltau, rowcov, rowchol, rowtau)
         if shape is None:
-            raise TypeError('shape is a required argument')
+            raise TypeError("shape is a required argument")
         assert len(shape) == 2, "shape must have length 2: mxn"
         self.shape = shape
         super(MatrixNormal, self).__init__(shape=shape, *args, **kwargs)
@@ -1220,93 +1272,106 @@ def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None,
         self.solve_upper = tt.slinalg.solve_upper_triangular
 
     def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
-        cholesky = Cholesky(lower=True, on_error='raise')
+        cholesky = Cholesky(lower=True, on_error="raise")
 
         # Among-row matrices
         if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of rowtau, rowcov, '
-                             'or rowchol.')
+            raise ValueError(
+                "Incompatible parameterization. "
+                "Specify exactly one of rowtau, rowcov, "
+                "or rowchol."
+            )
         if rowcov is not None:
             self.m = rowcov.shape[0]
-            self._rowcov_type = 'cov'
+            self._rowcov_type = "cov"
             rowcov = tt.as_tensor_variable(rowcov)
             if rowcov.ndim != 2:
-                raise ValueError('rowcov must be two dimensional.')
+                raise ValueError("rowcov must be two dimensional.")
             self.rowchol_cov = cholesky(rowcov)
             self.rowcov = rowcov
         elif rowtau is not None:
-            raise ValueError('rowtau not supported at this time')
+            raise ValueError("rowtau not supported at this time")
             self.m = rowtau.shape[0]
-            self._rowcov_type = 'tau'
+            self._rowcov_type = "tau"
             rowtau = tt.as_tensor_variable(rowtau)
             if rowtau.ndim != 2:
-                raise ValueError('rowtau must be two dimensional.')
+                raise ValueError("rowtau must be two dimensional.")
             self.rowchol_tau = cholesky(rowtau)
             self.rowtau = rowtau
         else:
             self.m = rowchol.shape[0]
-            self._rowcov_type = 'chol'
+            self._rowcov_type = "chol"
             if rowchol.ndim != 2:
-                raise ValueError('rowchol must be two dimensional.')
+                raise ValueError("rowchol must be two dimensional.")
             self.rowchol_cov = tt.as_tensor_variable(rowchol)
 
         # Among-column matrices
         if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of coltau, colcov, '
-                             'or colchol.')
+            raise ValueError(
+                "Incompatible parameterization. "
+                "Specify exactly one of coltau, colcov, "
+                "or colchol."
+            )
         if colcov is not None:
             self.n = colcov.shape[0]
-            self._colcov_type = 'cov'
+            self._colcov_type = "cov"
             colcov = tt.as_tensor_variable(colcov)
             if colcov.ndim != 2:
-                raise ValueError('colcov must be two dimensional.')
+                raise ValueError("colcov must be two dimensional.")
             self.colchol_cov = cholesky(colcov)
             self.colcov = colcov
         elif coltau is not None:
-            raise ValueError('coltau not supported at this time')
+            raise ValueError("coltau not supported at this time")
             self.n = coltau.shape[0]
-            self._colcov_type = 'tau'
+            self._colcov_type = "tau"
             coltau = tt.as_tensor_variable(coltau)
             if coltau.ndim != 2:
-                raise ValueError('coltau must be two dimensional.')
+                raise ValueError("coltau must be two dimensional.")
             self.colchol_tau = cholesky(coltau)
             self.coltau = coltau
         else:
             self.n = colchol.shape[0]
-            self._colcov_type = 'chol'
+            self._colcov_type = "chol"
             if colchol.ndim != 2:
-                raise ValueError('colchol must be two dimensional.')
+                raise ValueError("colchol must be two dimensional.")
             self.colchol_cov = tt.as_tensor_variable(colchol)
 
     def random(self, point=None, size=None):
         mu, colchol, rowchol = draw_values(
-                                [self.mu, self.colchol_cov, self.rowchol_cov],
-                                point=point,
-                                size=size)
+            [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size
+        )
         if size is None:
             size = ()
         if size in (None, ()):
-            standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1]))
+            standard_normal = np.random.standard_normal(
+                (self.shape[0], colchol.shape[-1])
+            )
             samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))
         else:
             samples = []
             size = tuple(np.atleast_1d(size))
             if mu.shape == tuple(self.shape):
                 for _ in range(np.prod(size)):
-                    standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1]))
-                    samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)))
+                    standard_normal = np.random.standard_normal(
+                        (self.shape[0], colchol.shape[-1])
+                    )
+                    samples.append(
+                        mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))
+                    )
             else:
                 for j in range(np.prod(size)):
-                    standard_normal = np.random.standard_normal((self.shape[0], colchol[j].shape[-1]))
-                    samples.append(mu[j] +
-                                np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)))
+                    standard_normal = np.random.standard_normal(
+                        (self.shape[0], colchol[j].shape[-1])
+                    )
+                    samples.append(
+                        mu[j]
+                        + np.matmul(
+                            rowchol[j], np.matmul(standard_normal, colchol[j].T)
+                        )
+                    )
             samples = np.array(samples).reshape(size + tuple(self.shape))
         return samples
 
-
-
     def _trquaddist(self, value):
         """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and
         the logdet of colcov and rowcov."""
@@ -1332,12 +1397,12 @@ def logp(self, value):
         trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value)
         m = self.m
         n = self.n
-        norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi))
-        return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet
+        norm = -0.5 * m * n * pm.floatX(np.log(2 * np.pi))
+        return norm - 0.5 * trquaddist - m * half_collogdet - n * half_rowlogdet
 
 
 class KroneckerNormal(Continuous):
-    R"""
+    r"""
     Multivariate normal log-likelihood with Kronecker-structured covariance.
 
     .. math::
@@ -1425,24 +1490,27 @@ class KroneckerNormal(Continuous):
     .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models"
     """
 
-    def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None,
-                 *args, **kwargs):
+    def __init__(
+        self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs
+    ):
         self._setup(covs, chols, evds, sigma)
         super(KroneckerNormal, self).__init__(*args, **kwargs)
         self.mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu
 
     def _setup(self, covs, chols, evds, sigma):
-        self.cholesky = Cholesky(lower=True, on_error='raise')
+        self.cholesky = Cholesky(lower=True, on_error="raise")
         if len([i for i in [covs, chols, evds] if i is not None]) != 1:
-            raise ValueError('Incompatible parameterization. '
-                             'Specify exactly one of covs, chols, '
-                             'or evds.')
+            raise ValueError(
+                "Incompatible parameterization. "
+                "Specify exactly one of covs, chols, "
+                "or evds."
+            )
         self._isEVD = False
         self.sigma = sigma
         self.is_noisy = self.sigma is not None and self.sigma != 0
         if covs is not None:
-            self._cov_type = 'cov'
+            self._cov_type = "cov"
             self.covs = covs
             if self.is_noisy:
                 # Noise requires eigendecomposition
@@ -1453,10 +1521,11 @@ def _setup(self, covs, chols, evds, sigma):
                 self.chols = list(map(self.cholesky, self.covs))
                 self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
                 self.sizes = tt.as_tensor_variable(
-                                [chol.shape[0] for chol in self.chols])
+                    [chol.shape[0] for chol in self.chols]
+                )
                 self.N = tt.prod(self.sizes)
         elif chols is not None:
-            self._cov_type = 'chol'
+            self._cov_type = "chol"
             if self.is_noisy:  # A strange case...
                 # Noise requires eigendecomposition
                 covs = [tt.dot(chol, chol.T) for chol in chols]
@@ -1466,10 +1535,11 @@ def _setup(self, covs, chols, evds, sigma):
                 self.chols = chols
                 self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
                 self.sizes = tt.as_tensor_variable(
-                                [chol.shape[0] for chol in self.chols])
+                    [chol.shape[0] for chol in self.chols]
+                )
                 self.N = tt.prod(self.sizes)
         else:
-            self._cov_type = 'evd'
+            self._cov_type = "evd"
             self._setup_evd(evds)
 
     def _setup_evd(self, eigh_iterable):
@@ -1481,18 +1551,18 @@ def _setup_evd(self, eigh_iterable):
         self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep))
         self.eigs = kron_diag(*self.eigs_sep)  # Combine separate eigs
         if self.is_noisy:
-            self.eigs += self.sigma**2
+            self.eigs += self.sigma ** 2
         self.N = self.eigs.shape[0]
 
     def _setup_random(self):
-        if not hasattr(self, 'mv_params'):
-            self.mv_params = {'mu': self.mu}
-            if self._cov_type == 'cov':
+        if not hasattr(self, "mv_params"):
+            self.mv_params = {"mu": self.mu}
+            if self._cov_type == "cov":
                 cov = kronecker(*self.covs)
                 if self.is_noisy:
-                    cov = cov + self.sigma**2 * tt.identity_like(cov)
-                self.mv_params['cov'] = cov
-            elif self._cov_type == 'chol':
+                    cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+                self.mv_params["cov"] = cov
+            elif self._cov_type == "chol":
                 if self.is_noisy:
                     covs = []
                     for eig, Q in zip(self.eigs_sep, self.Qs):
@@ -1500,19 +1570,19 @@ def _setup_random(self):
                         covs.append(cov_i)
                     cov = kronecker(*covs)
                     if self.is_noisy:
-                        cov = cov + self.sigma**2 * tt.identity_like(cov)
-                    self.mv_params['chol'] = self.cholesky(cov)
+                        cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+                    self.mv_params["chol"] = self.cholesky(cov)
                 else:
-                    self.mv_params['chol'] = kronecker(*self.chols)
-            elif self._cov_type == 'evd':
+                    self.mv_params["chol"] = kronecker(*self.chols)
+            elif self._cov_type == "evd":
                 covs = []
                 for eig, Q in zip(self.eigs_sep, self.Qs):
                     cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
                     covs.append(cov_i)
                 cov = kronecker(*covs)
                 if self.is_noisy:
-                    cov = cov + self.sigma**2 * tt.identity_like(cov)
-                self.mv_params['cov'] = cov
+                    cov = cov + self.sigma ** 2 * tt.identity_like(cov)
+                self.mv_params["cov"] = cov
 
     def random(self, point=None, size=None):
         # Expand params into terms MvNormal can understand to force consistency
@@ -1523,7 +1593,7 @@ def random(self, point=None, size=None):
     def _quaddist(self, value):
         """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))"""
         if value.ndim > 2 or value.ndim == 0:
-            raise ValueError('Invalid dimension for value: %s' % value.ndim)
+            raise ValueError("Invalid dimension for value: %s" % value.ndim)
         if value.ndim == 1:
             onedim = True
             value = value[None, :]
@@ -1533,14 +1603,14 @@ def _quaddist(self, value):
         delta = value - self.mu
         if self._isEVD:
             sqrt_quad = kron_dot(self.QTs, delta.T)
-            sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None])
+            sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None])
             logdet = tt.sum(tt.log(self.eigs))
         else:
             sqrt_quad = kron_solve_lower(self.chols, delta.T)
             logdet = 0
             for chol_size, chol_diag in zip(self.sizes, self.chol_diags):
-                logchol = tt.log(chol_diag) * self.N/chol_size
-                logdet += tt.sum(2*logchol)
+                logchol = tt.log(chol_diag) * self.N / chol_size
+                logdet += tt.sum(2 * logchol)
         # Square each sample
         quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T)
         if onedim:
@@ -1549,4 +1619,4 @@ def _quaddist(self, value):
 
     def logp(self, value):
         quad, logdet = self._quaddist(value)
-        return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0
+        return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0
diff --git a/pymc3/distributions/special.py b/pymc3/distributions/special.py
index 83d697ccf3..c53dbc5dbf 100644
--- a/pymc3/distributions/special.py
+++ b/pymc3/distributions/special.py
@@ -3,10 +3,10 @@
 from theano.scalar.basic_scipy import GammaLn, Psi
 from theano import scalar
 
-__all__ = ['gammaln', 'multigammaln', 'psi', 'log_i0']
+__all__ = ["gammaln", "multigammaln", "psi", "log_i0"]
 
-scalar_gammaln = GammaLn(scalar.upgrade_to_float, name='scalar_gammaln')
-gammaln = tt.Elemwise(scalar_gammaln, name='gammaln')
+scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln")
+gammaln = tt.Elemwise(scalar_gammaln, name="gammaln")
 
 
 def multigammaln(a, p):
@@ -19,21 +19,35 @@ def multigammaln(a, p):
        degrees of freedom. p > 0
     """
     i = tt.arange(1, p + 1)
-    return (p * (p - 1) * tt.log(np.pi) / 4.
-            + tt.sum(gammaln(a + (1. - i) / 2.), axis=0))
+    return p * (p - 1) * tt.log(np.pi) / 4.0 + tt.sum(
+        gammaln(a + (1.0 - i) / 2.0), axis=0
+    )
 
 
 def log_i0(x):
     """
     Calculates the logarithm of the 0 order modified Bessel function of the first kind""
     """
-    return tt.switch(tt.lt(x, 5), tt.log1p(x**2. / 4. + x**4. / 64. + x**6. / 2304.
-                                           + x**8. / 147456. + x**10. / 14745600.
-                                           + x**12. / 2123366400.),
-                                  x - 0.5 * tt.log(2. * np.pi * x) + tt.log1p(1. / (8. * x)
-                                  + 9. / (128. * x**2.) + 225. / (3072. * x**3.)
-                                  + 11025. / (98304. * x**4.)))
-
-
-scalar_psi = Psi(scalar.upgrade_to_float, name='scalar_psi')
-psi = tt.Elemwise(scalar_psi, name='psi')
+    return tt.switch(
+        tt.lt(x, 5),
+        tt.log1p(
+            x ** 2.0 / 4.0
+            + x ** 4.0 / 64.0
+            + x ** 6.0 / 2304.0
+            + x ** 8.0 / 147456.0
+            + x ** 10.0 / 14745600.0
+            + x ** 12.0 / 2123366400.0
+        ),
+        x
+        - 0.5 * tt.log(2.0 * np.pi * x)
+        + tt.log1p(
+            1.0 / (8.0 * x)
+            + 9.0 / (128.0 * x ** 2.0)
+            + 225.0 / (3072.0 * x ** 3.0)
+            + 11025.0 / (98304.0 * x ** 4.0)
+        ),
+    )
+
+
+scalar_psi = Psi(scalar.upgrade_to_float, name="scalar_psi")
+psi = tt.Elemwise(scalar_psi, name="psi")
diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py
index 9c5847271a..afbb3c9b8c 100644
--- a/pymc3/distributions/timeseries.py
+++ b/pymc3/distributions/timeseries.py
@@ -8,13 +8,13 @@
 
 
 __all__ = [
-    'AR1',
-    'AR',
-    'GaussianRandomWalk',
-    'GARCH11',
-    'EulerMaruyama',
-    'MvGaussianRandomWalk',
-    'MvStudentTRandomWalk'
+    "AR1",
+    "AR",
+    "GaussianRandomWalk",
+    "GARCH11",
+    "EulerMaruyama",
+    "MvGaussianRandomWalk",
+    "MvStudentTRandomWalk",
 ]
 
 
@@ -35,7 +35,7 @@ def __init__(self, k, tau_e, *args, **kwargs):
         self.k = k = tt.as_tensor_variable(k)
         self.tau_e = tau_e = tt.as_tensor_variable(tau_e)
         self.tau = tau_e * (1 - k ** 2)
-        self.mode = tt.as_tensor_variable(0.)
+        self.mode = tt.as_tensor_variable(0.0)
 
     def logp(self, x):
         k = self.k
@@ -43,7 +43,7 @@ def logp(self, x):
 
         x_im1 = x[:-1]
         x_i = x[1:]
-        boundary = Normal.dist(0., tau=tau_e).logp
+        boundary = Normal.dist(0.0, tau=tau_e).logp
 
         innov_like = Normal.dist(k * x_im1, tau=tau_e).logp(x_i)
         return boundary(x[0]) + tt.sum(innov_like)
@@ -53,13 +53,14 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         k = dist.k
         tau_e = dist.tau_e
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{AR1}}(\mathit{{k}}={},~\mathit{{tau_e}}={})$'.format(name,
-                 get_variable_name(k), get_variable_name(tau_e))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{AR1}}(\mathit{{k}}={},~\mathit{{tau_e}}={})$".format(
+            name, get_variable_name(k), get_variable_name(tau_e)
+        )
 
 
 class AR(distribution.Continuous):
-    R"""
+    r"""
     Autoregressive process with p lags.
 
     .. math::
@@ -89,16 +90,16 @@ class AR(distribution.Continuous):
         distribution for initial values (Defaults to Flat())
     """
 
-    def __init__(self, rho, sd=None, tau=None,
-                 constant=False, init=Flat.dist(),
-                 *args, **kwargs):
+    def __init__(
+        self, rho, sd=None, tau=None, constant=False, init=Flat.dist(), *args, **kwargs
+    ):
 
         super(AR, self).__init__(*args, **kwargs)
         tau, sd = get_tau_sd(tau=tau, sd=sd)
         self.sd = tt.as_tensor_variable(sd)
         self.tau = tt.as_tensor_variable(tau)
 
-        self.mean = tt.as_tensor_variable(0.)
+        self.mean = tt.as_tensor_variable(0.0)
 
         if isinstance(rho, list):
             p = len(rho)
@@ -124,23 +125,33 @@ def __init__(self, rho, sd=None, tau=None,
 
     def logp(self, value):
         if self.constant:
-            x = tt.add(*[self.rho[i + 1] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
-            eps = value[self.p:] - self.rho[0] - x
+            x = tt.add(
+                *[
+                    self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)]
+                    for i in range(self.p)
+                ]
+            )
+            eps = value[self.p :] - self.rho[0] - x
         else:
             if self.p == 1:
                 x = self.rho * value[:-1]
             else:
-                x = tt.add(*[self.rho[i] * value[self.p - (i + 1):-(i + 1)] for i in range(self.p)])
-            eps = value[self.p:] - x
+                x = tt.add(
+                    *[
+                        self.rho[i] * value[self.p - (i + 1) : -(i + 1)]
+                        for i in range(self.p)
+                    ]
+                )
+            eps = value[self.p :] - x
 
         innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
-        init_like = self.init.logp(value[:self.p])
+        init_like = self.init.logp(value[: self.p])
 
         return tt.sum(innov_like) + tt.sum(init_like)
 
 
 class GaussianRandomWalk(distribution.Continuous):
-    R"""
+    r"""
     Random Walk with Normal innovations
 
     Parameters
@@ -155,15 +166,14 @@ class GaussianRandomWalk(distribution.Continuous):
         distribution for initial value (Defaults to Flat())
     """
 
-    def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0.,
-                 *args, **kwargs):
+    def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0.0, *args, **kwargs):
         super(GaussianRandomWalk, self).__init__(*args, **kwargs)
         tau, sd = get_tau_sd(tau=tau, sd=sd)
         self.tau = tau = tt.as_tensor_variable(tau)
         self.sd = sd = tt.as_tensor_variable(sd)
         self.mu = mu = tt.as_tensor_variable(mu)
         self.init = init
-        self.mean = tt.as_tensor_variable(0.)
+        self.mean = tt.as_tensor_variable(0.0)
 
     def logp(self, x):
         tau = self.tau
@@ -182,14 +192,14 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         mu = dist.mu
         sd = dist.sd
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{{GaussianRandomWalk}}(\mathit{{mu}}={},~\mathit{{sd}}={})$'.format(name,
-                                                get_variable_name(mu),
-                                                get_variable_name(sd))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{{GaussianRandomWalk}}(\mathit{{mu}}={},~\mathit{{sd}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(sd)
+        )
 
 
 class GARCH11(distribution.Continuous):
-    R"""
+    r"""
     GARCH(1,1) with Normal innovations. The model is specified by
 
     .. math::
@@ -212,15 +222,14 @@ class GARCH11(distribution.Continuous):
         initial_vol >= 0, initial volatility, sigma_0
     """
 
-    def __init__(self, omega, alpha_1, beta_1,
-                 initial_vol, *args, **kwargs):
+    def __init__(self, omega, alpha_1, beta_1, initial_vol, *args, **kwargs):
         super(GARCH11, self).__init__(*args, **kwargs)
 
         self.omega = omega = tt.as_tensor_variable(omega)
         self.alpha_1 = alpha_1 = tt.as_tensor_variable(alpha_1)
         self.beta_1 = beta_1 = tt.as_tensor_variable(beta_1)
         self.initial_vol = tt.as_tensor_variable(initial_vol)
-        self.mean = tt.as_tensor_variable(0.)
+        self.mean = tt.as_tensor_variable(0.0)
 
     def get_volatility(self, x):
         x = x[:-1]
@@ -228,16 +237,17 @@ def get_volatility(self, x):
         def volatility_update(x, vol, w, a, b):
             return tt.sqrt(w + a * tt.square(x) + b * tt.square(vol))
 
-        vol, _ = scan(fn=volatility_update,
-                      sequences=[x],
-                      outputs_info=[self.initial_vol],
-                      non_sequences=[self.omega, self.alpha_1,
-                                     self.beta_1])
+        vol, _ = scan(
+            fn=volatility_update,
+            sequences=[x],
+            outputs_info=[self.initial_vol],
+            non_sequences=[self.omega, self.alpha_1, self.beta_1],
+        )
         return tt.concatenate([[self.initial_vol], vol])
 
     def logp(self, x):
         vol = self.get_volatility(x)
-        return tt.sum(Normal.dist(0., sd=vol).logp(x))
+        return tt.sum(Normal.dist(0.0, sd=vol).logp(x))
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -245,16 +255,17 @@ def _repr_latex_(self, name=None, dist=None):
         omega = dist.omega
         alpha_1 = dist.alpha_1
         beta_1 = dist.beta_1
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{GARCH}(1,~1,~\mathit{{omega}}={},~\mathit{{alpha_1}}={},~\mathit{{beta_1}}={})$'.format(
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{GARCH}(1,~1,~\mathit{{omega}}={},~\mathit{{alpha_1}}={},~\mathit{{beta_1}}={})$".format(
             name,
             get_variable_name(omega),
             get_variable_name(alpha_1),
-            get_variable_name(beta_1))
+            get_variable_name(beta_1),
+        )
 
 
 class EulerMaruyama(distribution.Continuous):
-    R"""
+    r"""
     Stochastic differential equation discretized with the Euler-Maruyama method.
 
     Parameters
@@ -266,6 +277,7 @@ class EulerMaruyama(distribution.Continuous):
     sde_pars : tuple
         parameters of the SDE, passed as *args to sde_fn
     """
+
     def __init__(self, dt, sde_fn, sde_pars, *args, **kwds):
         super(EulerMaruyama, self).__init__(*args, **kwds)
         self.dt = dt = tt.as_tensor_variable(dt)
@@ -283,14 +295,14 @@ def _repr_latex_(self, name=None, dist=None):
         if dist is None:
             dist = self
         dt = dist.dt
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{EulerMaruyama}(\mathit{{dt}}={})$'.format(name,
-                                                get_variable_name(dt))
-
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{EulerMaruyama}(\mathit{{dt}}={})$".format(
+            name, get_variable_name(dt)
+        )
 
 
 class MvGaussianRandomWalk(distribution.Continuous):
-    R"""
+    r"""
     Multivariate Random Walk with Normal innovations
 
     Parameters
@@ -311,14 +323,24 @@ class MvGaussianRandomWalk(distribution.Continuous):
     Only one of cov, tau or chol is required.
 
     """
-    def __init__(self, mu=0., cov=None, tau=None, chol=None, lower=True, init=Flat.dist(),
-                 *args, **kwargs):
+
+    def __init__(
+        self,
+        mu=0.0,
+        cov=None,
+        tau=None,
+        chol=None,
+        lower=True,
+        init=Flat.dist(),
+        *args,
+        **kwargs
+    ):
         super(MvGaussianRandomWalk, self).__init__(*args, **kwargs)
 
         self.init = init
         self.innovArgs = (mu, cov, tau, chol, lower)
         self.innov = multivariate.MvNormal.dist(*self.innovArgs)
-        self.mean = tt.as_tensor_variable(0.)
+        self.mean = tt.as_tensor_variable(0.0)
 
     def logp(self, x):
         x_im1 = x[:-1]
@@ -331,14 +353,14 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         mu = dist.innov.mu
         cov = dist.innov.cov
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{MvGaussianRandomWalk}(\mathit{{mu}}={},~\mathit{{cov}}={})$'.format(name,
-                                                get_variable_name(mu),
-                                                get_variable_name(cov))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{MvGaussianRandomWalk}(\mathit{{mu}}={},~\mathit{{cov}}={})$".format(
+            name, get_variable_name(mu), get_variable_name(cov)
+        )
 
 
 class MvStudentTRandomWalk(MvGaussianRandomWalk):
-    R"""
+    r"""
     Multivariate Random Walk with StudentT innovations
 
     Parameters
@@ -355,6 +377,7 @@ class MvStudentTRandomWalk(MvGaussianRandomWalk):
     init : distribution
         distribution for initial value (Defaults to Flat())
     """
+
     def __init__(self, nu, *args, **kwargs):
         super(MvStudentTRandomWalk, self).__init__(*args, **kwargs)
         self.nu = tt.as_tensor_variable(nu)
@@ -366,8 +389,7 @@ def _repr_latex_(self, name=None, dist=None):
         nu = dist.innov.nu
         mu = dist.innov.mu
         cov = dist.innov.cov
-        name = r'\text{%s}' % name
-        return r'${} \sim \text{MvStudentTRandomWalk}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{cov}}={})$'.format(name,
-                                                get_variable_name(nu),
-                                                get_variable_name(mu),
-                                                get_variable_name(cov))
+        name = r"\text{%s}" % name
+        return r"${} \sim \text{MvStudentTRandomWalk}(\mathit{{nu}}={},~\mathit{{mu}}={},~\mathit{{cov}}={})$".format(
+            name, get_variable_name(nu), get_variable_name(mu), get_variable_name(cov)
+        )
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 682b3426d7..d89f4ea65a 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -9,9 +9,19 @@
 import numpy as np
 from scipy.special import logit as nplogit
 
-__all__ = ['transform', 'stick_breaking', 'logodds', 'interval', 'log_exp_m1',
-           'lowerbound', 'upperbound', 'ordered', 'log', 'sum_to_1',
-           't_stick_breaking']
+__all__ = [
+    "transform",
+    "stick_breaking",
+    "logodds",
+    "interval",
+    "log_exp_m1",
+    "lowerbound",
+    "upperbound",
+    "ordered",
+    "log",
+    "sum_to_1",
+    "t_stick_breaking",
+]
 
 
 class Transform(object):
@@ -21,6 +31,7 @@ class Transform(object):
     ----------
     name : str
     """
+
     name = ""
 
     def forward(self, x):
@@ -99,7 +110,6 @@ def __str__(self):
 
 
 class ElemwiseTransform(Transform):
-
     def jacobian_det(self, x):
         grad = tt.reshape(gradient(tt.sum(self.backward(x)), [x]), x.shape)
         return tt.log(tt.abs_(grad))
@@ -122,14 +132,14 @@ def __init__(self, dist, transform, *args, **kwargs):
 
         self.dist = dist
         self.transform_used = transform
-        v = forward(FreeRV(name='v', distribution=dist))
+        v = forward(FreeRV(name="v", distribution=dist))
         self.type = v.type
 
         super(TransformedDistribution, self).__init__(
-            v.shape.tag.test_value, v.dtype,
-            testval, dist.defaults, *args, **kwargs)
+            v.shape.tag.test_value, v.dtype, testval, dist.defaults, *args, **kwargs
+        )
 
-        if transform.name == 'stickbreaking':
+        if transform.name == "stickbreaking":
             b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False))
             # force the last dim not broadcastable
             self.type = tt.TensorType(v.dtype, b)
@@ -144,6 +154,7 @@ def logp(self, x):
     def logp_nojac(self, x):
         return self.dist.logp(self.transform_used.backward(x))
 
+
 transform = Transform
 
 
@@ -162,6 +173,7 @@ def forward_val(self, x, point=None):
     def jacobian_det(self, x):
         return x
 
+
 log = Log()
 
 
@@ -176,14 +188,15 @@ def forward(self, x):
         y = Log(Exp(x) - 1)
           = Log(1 - Exp(-x)) + x
         """
-        return tt.log(1.-tt.exp(-x)) + x
+        return tt.log(1.0 - tt.exp(-x)) + x
 
     def forward_val(self, x, point=None):
-        return np.log(1.-np.exp(-x)) + x
+        return np.log(1.0 - np.exp(-x)) + x
 
     def jacobian_det(self, x):
         return -tt.nnet.softplus(-x)
 
+
 log_exp_m1 = LogExpM1()
 
 
@@ -199,6 +212,7 @@ def forward(self, x):
     def forward_val(self, x, point=None):
         return nplogit(x)
 
+
 logodds = LogOdds()
 
 
@@ -224,14 +238,14 @@ def forward_val(self, x, point=None):
         # 2017-06-19
         # the `self.a-0.` below is important for the testval to propagates
         # For an explanation see pull/2328#issuecomment-309303811
-        a, b = draw_values([self.a-0., self.b-0.],
-                            point=point)
+        a, b = draw_values([self.a - 0.0, self.b - 0.0], point=point)
         return floatX(np.log(x - a) - np.log(b - x))
 
     def jacobian_det(self, x):
         s = tt.nnet.softplus(-x)
         return tt.log(self.b - self.a) - 2 * s - x
 
+
 interval = Interval
 
 
@@ -256,13 +270,13 @@ def forward_val(self, x, point=None):
         # 2017-06-19
         # the `self.a-0.` below is important for the testval to propagates
         # For an explanation see pull/2328#issuecomment-309303811
-        a = draw_values([self.a-0.],
-                        point=point)[0]
+        a = draw_values([self.a - 0.0], point=point)[0]
         return floatX(np.log(x - a))
 
     def jacobian_det(self, x):
         return x
 
+
 lowerbound = LowerBound
 
 
@@ -287,13 +301,13 @@ def forward_val(self, x, point=None):
         # 2017-06-19
         # the `self.b-0.` below is important for the testval to propagates
         # For an explanation see pull/2328#issuecomment-309303811
-        b = draw_values([self.b-0.],
-                        point=point)[0]
+        b = draw_values([self.b - 0.0], point=point)[0]
         return floatX(np.log(b - x))
 
     def jacobian_det(self, x):
         return x
 
+
 upperbound = UpperBound
 
 
@@ -321,6 +335,7 @@ def forward_val(self, x, point=None):
     def jacobian_det(self, y):
         return tt.sum(y[..., 1:], axis=-1)
 
+
 ordered = Ordered()
 
 
@@ -329,6 +344,7 @@ class SumTo1(Transform):
     Transforms K dimensional simplex space (values in [0,1] and sum to 1) to K - 1 vector of values in [0,1]
     This Transformation operates on the last dimension of the input tensor.
     """
+
     name = "sumto1"
 
     def backward(self, y):
@@ -345,6 +361,7 @@ def jacobian_det(self, x):
         y = tt.zeros(x.shape)
         return tt.sum(y, axis=-1)
 
+
 sum_to_1 = SumTo1()
 
 
@@ -371,8 +388,8 @@ def forward(self, x_):
         s = tt.extra_ops.cumsum(x0[::-1], 0)[::-1] + x[-1]
         z = x0 / s
         Km1 = x.shape[0] - 1
-        k = tt.arange(Km1)[(slice(None), ) + (None, ) * (x.ndim - 1)]
-        eq_share = logit(1. / (Km1 + 1 - k).astype(str(x_.dtype)))
+        k = tt.arange(Km1)[(slice(None),) + (None,) * (x.ndim - 1)]
+        eq_share = logit(1.0 / (Km1 + 1 - k).astype(str(x_.dtype)))
         y = logit(z) - eq_share
         return floatX(y.T)
 
@@ -384,15 +401,15 @@ def forward_val(self, x_, point=None):
         z = x0 / s
         Km1 = x.shape[0] - 1
         k = np.arange(Km1)[(slice(None),) + (None,) * (x.ndim - 1)]
-        eq_share = nplogit(1. / (Km1 + 1 - k).astype(str(x_.dtype)))
+        eq_share = nplogit(1.0 / (Km1 + 1 - k).astype(str(x_.dtype)))
         y = nplogit(z) - eq_share
         return floatX(y.T)
 
     def backward(self, y_):
         y = y_.T
         Km1 = y.shape[0]
-        k = tt.arange(Km1)[(slice(None), ) + (None, ) * (y.ndim - 1)]
-        eq_share = logit(1. / (Km1 + 1 - k).astype(str(y_.dtype)))
+        k = tt.arange(Km1)[(slice(None),) + (None,) * (y.ndim - 1)]
+        eq_share = logit(1.0 / (Km1 + 1 - k).astype(str(y_.dtype)))
         z = invlogit(y + eq_share, self.eps)
         yl = tt.concatenate([z, tt.ones(y[:1].shape)])
         yu = tt.concatenate([tt.ones(y[:1].shape), 1 - z])
@@ -403,12 +420,15 @@ def backward(self, y_):
     def jacobian_det(self, y_):
         y = y_.T
         Km1 = y.shape[0]
-        k = tt.arange(Km1)[(slice(None), ) + (None, ) * (y.ndim - 1)]
-        eq_share = logit(1. / (Km1 + 1 - k).astype(str(y_.dtype)))
+        k = tt.arange(Km1)[(slice(None),) + (None,) * (y.ndim - 1)]
+        eq_share = logit(1.0 / (Km1 + 1 - k).astype(str(y_.dtype)))
         yl = y + eq_share
         yu = tt.concatenate([tt.ones(y[:1].shape), 1 - invlogit(yl, self.eps)])
         S = tt.extra_ops.cumprod(yu, 0)
-        return tt.sum(tt.log(S[:-1]) - tt.log1p(tt.exp(yl)) - tt.log1p(tt.exp(-yl)), 0).T
+        return tt.sum(
+            tt.log(S[:-1]) - tt.log1p(tt.exp(yl)) - tt.log1p(tt.exp(-yl)), 0
+        ).T
+
 
 stick_breaking = StickBreaking()
 
@@ -418,6 +438,7 @@ def jacobian_det(self, y_):
 class Circular(ElemwiseTransform):
     """Transforms a linear space into a circular one.
     """
+
     name = "circular"
 
     def backward(self, y):
@@ -432,6 +453,7 @@ def forward_val(self, x, point=None):
     def jacobian_det(self, x):
         return tt.zeros(x.shape)
 
+
 circular = Circular()
 
 
@@ -458,7 +480,7 @@ def jacobian_det(self, y):
 class Chain(Transform):
     def __init__(self, transform_list):
         self.transform_list = transform_list
-        self.name = '+'.join([transf.name for transf in self.transform_list])
+        self.name = "+".join([transf.name for transf in self.transform_list])
 
     def forward(self, x):
         y = x
@@ -488,7 +510,7 @@ def jacobian_det(self, y):
             y = transf.backward(y)
             ndim0 = min(ndim0, det_.ndim)
         # match the shape of the smallest jacobian_det
-        det = 0.
+        det = 0.0
         for det_ in det_list:
             if det_.ndim > ndim0:
                 det += det_.sum(axis=-1)
diff --git a/pymc3/examples/GHME_2013.py b/pymc3/examples/GHME_2013.py
index bb1e57c7e0..eb2f4439de 100644
--- a/pymc3/examples/GHME_2013.py
+++ b/pymc3/examples/GHME_2013.py
@@ -5,11 +5,11 @@
 from pymc3 import HalfCauchy, Model, Normal, get_data, sample
 from pymc3.distributions.timeseries import GaussianRandomWalk
 
-data = pd.read_csv(get_data('pancreatitis.csv'))
-countries = ['CYP', 'DNK', 'ESP', 'FIN', 'GBR', 'ISL']
+data = pd.read_csv(get_data("pancreatitis.csv"))
+countries = ["CYP", "DNK", "ESP", "FIN", "GBR", "ISL"]
 data = data[data.area.isin(countries)]
 
-age = data['age'] = np.array(data.age_start + data.age_end) / 2
+age = data["age"] = np.array(data.age_start + data.age_end) / 2
 rate = data.value = data.value * 1000
 group, countries = pd.factorize(data.area, order=countries)
 
@@ -20,7 +20,7 @@
     plt.subplot(2, 3, i + 1)
     plt.title(country)
     d = data[data.area == country]
-    plt.plot(d.age, d.value, '.')
+    plt.plot(d.age, d.value, ".")
 
     plt.ylim(0, rate.max())
 
@@ -43,33 +43,33 @@ def interpolate(x0, y0, x, group):
 
 
 with Model() as model:
-    coeff_sd = HalfCauchy('coeff_sd', 5)
+    coeff_sd = HalfCauchy("coeff_sd", 5)
 
-    y = GaussianRandomWalk('y', sd=coeff_sd, shape=(nknots, ncountries))
+    y = GaussianRandomWalk("y", sd=coeff_sd, shape=(nknots, ncountries))
 
     p = interpolate(knots, y, age, group)
 
-    sd = HalfCauchy('sd', 5)
+    sd = HalfCauchy("sd", 5)
 
-    vals = Normal('vals', p, sd=sd, observed=rate)
+    vals = Normal("vals", p, sd=sd, observed=rate)
 
 
 def run(n=3000):
     if n == "short":
         n = 150
     with model:
-        trace = sample(n, tune=int(n/2), init='advi+adapt_diag')
+        trace = sample(n, tune=int(n / 2), init="advi+adapt_diag")
 
     for i, country in enumerate(countries):
         plt.subplot(2, 3, i + 1)
         plt.title(country)
 
         d = data[data.area == country]
-        plt.plot(d.age, d.value, '.')
-        plt.plot(knots, trace[y][::5, :, i].T, color='r', alpha=.01)
+        plt.plot(d.age, d.value, ".")
+        plt.plot(knots, trace[y][::5, :, i].T, color="r", alpha=0.01)
 
         plt.ylim(0, rate.max())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/LKJ_correlation.py b/pymc3/examples/LKJ_correlation.py
index b2dc0bb1ff..9080a17e6e 100644
--- a/pymc3/examples/LKJ_correlation.py
+++ b/pymc3/examples/LKJ_correlation.py
@@ -14,32 +14,36 @@
 stds = np.ones(4) / 2.0
 
 # Correlation matrix of 4 variables:
-corr_r = np.array([[1.,  0.75,  0.,  0.15],
-                   [0.75,  1., -0.06,  0.19],
-                   [0., -0.06,  1., -0.04],
-                   [0.15,  0.19, -0.04,  1.]])
+corr_r = np.array(
+    [
+        [1.0, 0.75, 0.0, 0.15],
+        [0.75, 1.0, -0.06, 0.19],
+        [0.0, -0.06, 1.0, -0.04],
+        [0.15, 0.19, -0.04, 1.0],
+    ]
+)
 cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))
 
 dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)
 
 with pm.Model() as model:
 
-    mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)
+    mu = pm.Normal("mu", mu=0, sd=1, shape=n_var)
 
     # Note that we access the distribution for the standard
     # deviations, and do not create a new random variable.
     sd_dist = pm.HalfCauchy.dist(beta=2.5)
-    packed_chol = pm.LKJCholeskyCov('chol_cov', n=n_var, eta=1, sd_dist=sd_dist)
+    packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist)
     # compute the covariance matrix
     chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
     cov = tt.dot(chol, chol.T)
 
     # Extract the standard deviations etc
-    sd = pm.Deterministic('sd', tt.sqrt(tt.diag(cov)))
-    corr = tt.diag(sd**-1).dot(cov.dot(tt.diag(sd**-1)))
-    r = pm.Deterministic('r', corr[np.triu_indices(n_var, k=1)])
+    sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov)))
+    corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1)))
+    r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)])
 
-    like = pm.MvNormal('likelihood', mu=mu, chol=chol, observed=dataset)
+    like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset)
 
 
 def run(n=1000):
@@ -47,8 +51,12 @@ def run(n=1000):
         n = 50
     with model:
         trace = pm.sample(n)
-    pm.traceplot(trace, varnames=['mu', 'r'],
-                 lines={'mu': mu_r, 'r': corr_r[np.triu_indices(n_var, k=1)]})
+    pm.traceplot(
+        trace,
+        varnames=["mu", "r"],
+        lines={"mu": mu_r, "r": corr_r[np.triu_indices(n_var, k=1)]},
+    )
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/arbitrary_stochastic.py b/pymc3/examples/arbitrary_stochastic.py
index 2d6e4fe1f2..1b747e3b7d 100644
--- a/pymc3/examples/arbitrary_stochastic.py
+++ b/pymc3/examples/arbitrary_stochastic.py
@@ -10,15 +10,15 @@ def logp(failure, lam, value):
 
 def build_model():
     # data
-    failure = np.array([0., 1.])
-    value = np.array([1., 0.])
+    failure = np.array([0.0, 1.0])
+    value = np.array([1.0, 0.0])
 
     # model
     with pm.Model() as model:
-        lam = pm.Exponential('lam', 1.)
-        pm.DensityDist('x', logp, observed={'failure': failure,
-                                            'lam': lam,
-                                            'value': value})
+        lam = pm.Exponential("lam", 1.0)
+        pm.DensityDist(
+            "x", logp, observed={"failure": failure, "lam": lam, "value": value}
+        )
     return model
 
 
@@ -28,5 +28,6 @@ def run(n_samples=3000):
         trace = pm.sample(n_samples)
     return trace
 
+
 if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/arma_example.py b/pymc3/examples/arma_example.py
index 55889bbf97..2f8300c3b8 100644
--- a/pymc3/examples/arma_example.py
+++ b/pymc3/examples/arma_example.py
@@ -2,6 +2,7 @@
 from theano import scan, shared
 
 import numpy as np
+
 """
 ARMA example
 It is interesting to note just how much more compact this is than the original STAN example
@@ -53,10 +54,10 @@
 def build_model():
     y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32))
     with pm.Model() as arma_model:
-        sigma = pm.HalfNormal('sigma', 5.)
-        theta = pm.Normal('theta', 0., sd=1.)
-        phi = pm.Normal('phi', 0., sd=2.)
-        mu = pm.Normal('mu', 0., sd=10.)
+        sigma = pm.HalfNormal("sigma", 5.0)
+        theta = pm.Normal("theta", 0.0, sd=1.0)
+        phi = pm.Normal("phi", 0.0, sd=2.0)
+        mu = pm.Normal("mu", 0.0, sd=10.0)
 
         err0 = y[0] - (mu + phi * mu)
 
@@ -64,25 +65,27 @@ def calc_next(last_y, this_y, err, mu, phi, theta):
             nu_t = mu + phi * last_y + theta * err
             return this_y - nu_t
 
-        err, _ = scan(fn=calc_next,
-                      sequences=dict(input=y, taps=[-1, 0]),
-                      outputs_info=[err0],
-                      non_sequences=[mu, phi, theta])
+        err, _ = scan(
+            fn=calc_next,
+            sequences=dict(input=y, taps=[-1, 0]),
+            outputs_info=[err0],
+            non_sequences=[mu, phi, theta],
+        )
 
-        pm.Potential('like', pm.Normal.dist(0, sd=sigma).logp(err))
+        pm.Potential("like", pm.Normal.dist(0, sd=sigma).logp(err))
     return arma_model
 
 
 def run(n_samples=1000):
     model = build_model()
     with model:
-        trace = pm.sample(draws=n_samples,
-                          tune=1000,
-                          nuts_kwargs=dict(target_accept=.99))
+        trace = pm.sample(
+            draws=n_samples, tune=1000, nuts_kwargs=dict(target_accept=0.99)
+        )
 
     pm.plots.traceplot(trace)
     pm.plots.forestplot(trace)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/baseball.py b/pymc3/examples/baseball.py
index 89eefa9ce8..03c159851e 100644
--- a/pymc3/examples/baseball.py
+++ b/pymc3/examples/baseball.py
@@ -6,31 +6,38 @@
 import pymc3 as pm
 import numpy as np
 
+
 def build_model():
-    data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", 
-                      skiprows=1, usecols=(2,3))
-    
-    atbats = pm.floatX(data[:,0])
-    hits = pm.floatX(data[:,1])
-    
+    data = np.loadtxt(
+        pm.get_data("efron-morris-75-data.tsv"),
+        delimiter="\t",
+        skiprows=1,
+        usecols=(2, 3),
+    )
+
+    atbats = pm.floatX(data[:, 0])
+    hits = pm.floatX(data[:, 1])
+
     N = len(hits)
-    
+
     # we want to bound the kappa below
     BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)
-    
+
     with pm.Model() as model:
-        phi = pm.Uniform('phi', lower=0.0, upper=1.0)
-        kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5)
-        thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
-        ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits)
+        phi = pm.Uniform("phi", lower=0.0, upper=1.0)
+        kappa = BoundedKappa("kappa", alpha=1.0001, m=1.5)
+        thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N)
+        ys = pm.Binomial("ys", n=atbats, p=thetas, observed=hits)
     return model
 
+
 def run(n=2000):
     model = build_model()
     with model:
-        trace = pm.sample(n, nuts_kwargs={'target_accept':.99})
+        trace = pm.sample(n, nuts_kwargs={"target_accept": 0.99})
 
     pm.traceplot(trace)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/custom_dists.py b/pymc3/examples/custom_dists.py
index a535fd4fff..4aaa4080fb 100644
--- a/pymc3/examples/custom_dists.py
+++ b/pymc3/examples/custom_dists.py
@@ -22,23 +22,25 @@
 # add scatter to points
 xdata = np.random.normal(xdata, 10)
 ydata = np.random.normal(ydata, 10)
-data = {'x': xdata, 'y': ydata}
+data = {"x": xdata, "y": ydata}
 
 # define loglikelihood outside of the model context, otherwise cores wont work:
 # Lambdas defined in local namespace are not picklable (see issue #1995)
 def loglike1(value):
-    return -1.5 * tt.log(1 + value**2)
+    return -1.5 * tt.log(1 + value ** 2)
+
+
 def loglike2(value):
     return -tt.log(tt.abs_(value))
 
+
 with pm.Model() as model:
-    alpha = pm.Normal('intercept', mu=0, sd=100)
+    alpha = pm.Normal("intercept", mu=0, sd=100)
     # Create custom densities
-    beta = pm.DensityDist('slope', loglike1, testval=0)
-    sigma = pm.DensityDist('sigma', loglike2, testval=1)
+    beta = pm.DensityDist("slope", loglike1, testval=0)
+    sigma = pm.DensityDist("sigma", loglike2, testval=1)
     # Create likelihood
-    like = pm.Normal('y_est', mu=alpha + beta *
-                        xdata, sd=sigma, observed=ydata)
+    like = pm.Normal("y_est", mu=alpha + beta * xdata, sd=sigma, observed=ydata)
 
     trace = pm.sample(2000, cores=2)
 
@@ -47,10 +49,11 @@ def loglike2(value):
 # Create some convenience routines for plotting
 # All functions below written by Jake Vanderplas
 
+
 def compute_sigma_level(trace1, trace2, nbins=20):
     """From a set of traces, bin by number of standard deviations"""
     L, xbins, ybins = np.histogram2d(trace1, trace2, nbins)
-    L[L == 0] = 1E-16
+    L[L == 0] = 1e-16
 
     shape = L.shape
     L = L.ravel()
@@ -73,14 +76,14 @@ def plot_MCMC_trace(ax, xdata, ydata, trace, scatter=False, **kwargs):
     xbins, ybins, sigma = compute_sigma_level(trace[0], trace[1])
     ax.contour(xbins, ybins, sigma.T, levels=[0.683, 0.955], **kwargs)
     if scatter:
-        ax.plot(trace[0], trace[1], ',k', alpha=0.1)
-    ax.set_xlabel(r'$\alpha$')
-    ax.set_ylabel(r'$\beta$')
+        ax.plot(trace[0], trace[1], ",k", alpha=0.1)
+    ax.set_xlabel(r"$\alpha$")
+    ax.set_ylabel(r"$\beta$")
 
 
 def plot_MCMC_model(ax, xdata, ydata, trace):
     """Plot the linear model and 2sigma contours"""
-    ax.plot(xdata, ydata, 'ok')
+    ax.plot(xdata, ydata, "ok")
 
     alpha, beta = trace[:2]
     xfit = np.linspace(-20, 120, 10)
@@ -88,22 +91,21 @@ def plot_MCMC_model(ax, xdata, ydata, trace):
     mu = yfit.mean(0)
     sig = 2 * yfit.std(0)
 
-    ax.plot(xfit, mu, '-k')
-    ax.fill_between(xfit, mu - sig, mu + sig, color='lightgray')
+    ax.plot(xfit, mu, "-k")
+    ax.fill_between(xfit, mu - sig, mu + sig, color="lightgray")
 
-    ax.set_xlabel('x')
-    ax.set_ylabel('y')
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
 
 
-def plot_MCMC_results(xdata, ydata, trace, colors='k'):
+def plot_MCMC_results(xdata, ydata, trace, colors="k"):
     """Plot both the trace and the model together"""
     _, ax = plt.subplots(1, 2, figsize=(10, 4))
     plot_MCMC_trace(ax[0], xdata, ydata, trace, True, colors=colors)
     plot_MCMC_model(ax[1], xdata, ydata, trace)
 
-pymc3_trace = [trace['intercept'],
-               trace['slope'],
-               trace['sigma']]
+
+pymc3_trace = [trace["intercept"], trace["slope"], trace["sigma"]]
 
 plot_MCMC_results(xdata, ydata, pymc3_trace)
 plt.show()
diff --git a/pymc3/examples/disaster_model.py b/pymc3/examples/disaster_model.py
index 45c9ec0120..81347dd039 100644
--- a/pymc3/examples/disaster_model.py
+++ b/pymc3/examples/disaster_model.py
@@ -14,34 +14,148 @@
 from numpy import arange, array
 
 
-__all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate',
-           'disasters']
+__all__ = [
+    "disasters_data",
+    "switchpoint",
+    "early_mean",
+    "late_mean",
+    "rate",
+    "disasters",
+]
 
 
 # Time series of recorded coal mining disasters in the UK from 1851 to 1962
-disasters_data = array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
-                        3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
-                        2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
-                        1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
-                        0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
-                        3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
-                        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
+disasters_data = array(
+    [
+        4,
+        5,
+        4,
+        0,
+        1,
+        4,
+        3,
+        4,
+        0,
+        6,
+        3,
+        3,
+        4,
+        0,
+        2,
+        6,
+        3,
+        3,
+        5,
+        4,
+        5,
+        3,
+        1,
+        4,
+        4,
+        1,
+        5,
+        5,
+        3,
+        4,
+        2,
+        5,
+        2,
+        2,
+        3,
+        4,
+        2,
+        1,
+        3,
+        2,
+        2,
+        1,
+        1,
+        1,
+        1,
+        3,
+        0,
+        0,
+        1,
+        0,
+        1,
+        1,
+        0,
+        0,
+        3,
+        1,
+        0,
+        3,
+        2,
+        2,
+        0,
+        1,
+        1,
+        1,
+        0,
+        1,
+        0,
+        1,
+        0,
+        0,
+        0,
+        2,
+        1,
+        0,
+        0,
+        0,
+        1,
+        1,
+        0,
+        2,
+        3,
+        3,
+        1,
+        1,
+        2,
+        1,
+        1,
+        1,
+        1,
+        2,
+        4,
+        2,
+        0,
+        0,
+        1,
+        4,
+        0,
+        0,
+        0,
+        1,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0,
+        0,
+        1,
+        0,
+        1,
+    ]
+)
 year = arange(1851, 1962)
 
 with pm.Model() as model:
 
-    switchpoint = pm.DiscreteUniform('switchpoint', lower=year.min(), upper=year.max())
-    early_mean = pm.Exponential('early_mean', lam=1.)
-    late_mean = pm.Exponential('late_mean', lam=1.)
+    switchpoint = pm.DiscreteUniform("switchpoint", lower=year.min(), upper=year.max())
+    early_mean = pm.Exponential("early_mean", lam=1.0)
+    late_mean = pm.Exponential("late_mean", lam=1.0)
 
     # Allocate appropriate Poisson rates to years before and after current
     # switchpoint location
     rate = tt.switch(switchpoint >= year, early_mean, late_mean)
-    
-    disasters = pm.Poisson('disasters', rate, observed=disasters_data)
+
+    disasters = pm.Poisson("disasters", rate, observed=disasters_data)
 
     # Initial values for stochastic nodes
-    start = {'early_mean': 2., 'late_mean': 3.}
-    
+    start = {"early_mean": 2.0, "late_mean": 3.0}
+
     tr = pm.sample(1000, tune=500, start=start)
     pm.traceplot(tr)
diff --git a/pymc3/examples/disaster_model_theano_op.py b/pymc3/examples/disaster_model_theano_op.py
index de11b57079..ae3115a678 100644
--- a/pymc3/examples/disaster_model_theano_op.py
+++ b/pymc3/examples/disaster_model_theano_op.py
@@ -10,17 +10,131 @@
 import theano.tensor as tt
 from numpy import arange, array, empty
 
-__all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate',
-           'disasters']
+__all__ = [
+    "disasters_data",
+    "switchpoint",
+    "early_mean",
+    "late_mean",
+    "rate",
+    "disasters",
+]
 
 # Time series of recorded coal mining disasters in the UK from 1851 to 1962
-disasters_data = array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
-                        3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
-                        2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
-                        1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
-                        0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
-                        3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
-                        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
+disasters_data = array(
+    [
+        4,
+        5,
+        4,
+        0,
+        1,
+        4,
+        3,
+        4,
+        0,
+        6,
+        3,
+        3,
+        4,
+        0,
+        2,
+        6,
+        3,
+        3,
+        5,
+        4,
+        5,
+        3,
+        1,
+        4,
+        4,
+        1,
+        5,
+        5,
+        3,
+        4,
+        2,
+        5,
+        2,
+        2,
+        3,
+        4,
+        2,
+        1,
+        3,
+        2,
+        2,
+        1,
+        1,
+        1,
+        1,
+        3,
+        0,
+        0,
+        1,
+        0,
+        1,
+        1,
+        0,
+        0,
+        3,
+        1,
+        0,
+        3,
+        2,
+        2,
+        0,
+        1,
+        1,
+        1,
+        0,
+        1,
+        0,
+        1,
+        0,
+        0,
+        0,
+        2,
+        1,
+        0,
+        0,
+        0,
+        1,
+        1,
+        0,
+        2,
+        3,
+        3,
+        1,
+        1,
+        2,
+        1,
+        1,
+        1,
+        1,
+        2,
+        4,
+        2,
+        0,
+        0,
+        1,
+        4,
+        0,
+        0,
+        0,
+        1,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0,
+        0,
+        1,
+        0,
+        1,
+    ]
+)
 years = len(disasters_data)
 
 
@@ -35,10 +149,10 @@ def rate_(switchpoint, early_mean, late_mean):
 with pm.Model() as model:
 
     # Prior for distribution of switchpoint location
-    switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years)
+    switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years)
     # Priors for pre- and post-switch mean number of disasters
-    early_mean = pm.Exponential('early_mean', lam=1.)
-    late_mean = pm.Exponential('late_mean', lam=1.)
+    early_mean = pm.Exponential("early_mean", lam=1.0)
+    late_mean = pm.Exponential("late_mean", lam=1.0)
 
     # Allocate appropriate Poisson rates to years before and after current
     # switchpoint location
@@ -46,7 +160,7 @@ def rate_(switchpoint, early_mean, late_mean):
     rate = rate_(switchpoint, early_mean, late_mean)
 
     # Data likelihood
-    disasters = pm.Poisson('disasters', rate, observed=disasters_data)
+    disasters = pm.Poisson("disasters", rate, observed=disasters_data)
 
     # Use slice sampler for means
     step1 = pm.Slice([early_mean, late_mean])
@@ -54,7 +168,7 @@ def rate_(switchpoint, early_mean, late_mean):
     step2 = pm.Metropolis([switchpoint])
 
     # Initial values for stochastic nodes
-    start = {'early_mean': 2., 'late_mean': 3.}
+    start = {"early_mean": 2.0, "late_mean": 3.0}
 
     tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], cores=2)
     pm.traceplot(tr)
diff --git a/pymc3/examples/factor_potential.py b/pymc3/examples/factor_potential.py
index ccb1c9251c..078c0eb27f 100644
--- a/pymc3/examples/factor_potential.py
+++ b/pymc3/examples/factor_potential.py
@@ -7,12 +7,14 @@
 STAN.
 """
 
+
 def build_model():
     with pm.Model() as model:
-        x = pm.Normal('x', 1, 1)
-        x2 = pm.Potential('x2', -x ** 2)
+        x = pm.Normal("x", 1, 1)
+        x2 = pm.Potential("x2", -x ** 2)
     return model
 
+
 def run(n=1000):
     model = build_model()
     if n == "short":
@@ -20,5 +22,6 @@ def run(n=1000):
     with model:
         pm.sample(n)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/garch_example.py b/pymc3/examples/garch_example.py
index d530e76011..cd8516f5d1 100644
--- a/pymc3/examples/garch_example.py
+++ b/pymc3/examples/garch_example.py
@@ -38,12 +38,11 @@ def get_garch_model():
     shape = r.shape
 
     with Model() as garch:
-        alpha1 = Uniform('alpha1', 0., 1., shape=shape)
-        beta1 = Uniform('beta1', 0., 1 - alpha1, shape=shape)
-        mu = Normal('mu', mu=0., sd=100., shape=shape)
-        theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
-                        beta1 * tt.pow(sigma1, 2))
-        Normal('obs', mu, sd=theta, observed=r)
+        alpha1 = Uniform("alpha1", 0.0, 1.0, shape=shape)
+        beta1 = Uniform("beta1", 0.0, 1 - alpha1, shape=shape)
+        mu = Normal("mu", mu=0.0, sd=100.0, shape=shape)
+        theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2))
+        Normal("obs", mu, sd=theta, observed=r)
     return garch
 
 
@@ -55,5 +54,5 @@ def run(n=1000):
     return tr
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     summary(run())
diff --git a/pymc3/examples/gelman_bioassay.py b/pymc3/examples/gelman_bioassay.py
index 69d7ca1118..2de90d146c 100644
--- a/pymc3/examples/gelman_bioassay.py
+++ b/pymc3/examples/gelman_bioassay.py
@@ -4,19 +4,19 @@
 # Samples for each dose level
 n = 5 * ones(4, dtype=int)
 # Log-dose
-dose = array([-.86, -.3, -.05, .73])
+dose = array([-0.86, -0.3, -0.05, 0.73])
 
 with pm.Model() as model:
 
     # Logit-linear model parameters
-    alpha = pm.Normal('alpha', 0, sd=100.)
-    beta = pm.Normal('beta', 0, sd=1.)
+    alpha = pm.Normal("alpha", 0, sd=100.0)
+    beta = pm.Normal("beta", 0, sd=1.0)
 
     # Calculate probabilities of death
-    theta = pm.Deterministic('theta', pm.math.invlogit(alpha + beta * dose))
+    theta = pm.Deterministic("theta", pm.math.invlogit(alpha + beta * dose))
 
     # Data likelihood
-    deaths = pm.Binomial('deaths', n=n, p=theta, observed=[0, 1, 3, 5])
+    deaths = pm.Binomial("deaths", n=n, p=theta, observed=[0, 1, 3, 5])
 
 
 def run(n=1000):
@@ -25,5 +25,6 @@ def run(n=1000):
     with model:
         pm.sample(n, tune=1000)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/gelman_schools.py b/pymc3/examples/gelman_schools.py
index 0331f81682..4ba7c10136 100644
--- a/pymc3/examples/gelman_schools.py
+++ b/pymc3/examples/gelman_schools.py
@@ -1,7 +1,7 @@
 from pymc3 import HalfCauchy, Normal, sample, Model, loo
 import numpy as np
 
-'''Original Stan model
+"""Original Stan model
 
 data {
   int<lower=0> J; // number of schools
@@ -22,21 +22,21 @@
   eta ~ normal(0, 1);
   y ~ normal(theta, sigma);
 }
-'''
+"""
 
 J = 8
-y = np.array([28,  8, -3,  7, -1,  1, 18, 12])
-sigma = np.array([15, 10, 16, 11,  9, 11, 10, 18])
+y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
+sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])
 
 with Model() as schools:
 
-    eta = Normal('eta', 0, 1, shape=J)
-    mu = Normal('mu', 0, sd=1e6)
-    tau = HalfCauchy('tau', 25)
+    eta = Normal("eta", 0, 1, shape=J)
+    mu = Normal("mu", 0, sd=1e6)
+    tau = HalfCauchy("tau", 25)
 
     theta = mu + tau * eta
 
-    obs = Normal('obs', theta, sd=sigma, observed=y)
+    obs = Normal("obs", theta, sd=sigma, observed=y)
 
 
 def run(n=1000):
@@ -46,5 +46,6 @@ def run(n=1000):
         tr = sample(n)
         loo(tr)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/lasso_missing.py b/pymc3/examples/lasso_missing.py
index 3bf42ede9f..3929a34930 100644
--- a/pymc3/examples/lasso_missing.py
+++ b/pymc3/examples/lasso_missing.py
@@ -3,53 +3,72 @@
 from numpy.ma import masked_values
 
 # Import data, filling missing values with sentinels (-999)
-test_scores = pd.read_csv(pm.get_data('test_scores.csv')).fillna(-999)
+test_scores = pd.read_csv(pm.get_data("test_scores.csv")).fillna(-999)
 
 # Extract variables: test score, gender, number of siblings, previous disability, age,
 # mother with HS education or better, hearing loss identified by 3 months
 # of age
-(score, male, siblings, disability,
-    age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings',
-                                                'prev_disab', 'age_test',
-                                                'mother_hs', 'early_ident']].astype(float).values.T
+(score, male, siblings, disability, age, mother_hs, early_ident) = (
+    test_scores[
+        [
+            "score",
+            "male",
+            "siblings",
+            "prev_disab",
+            "age_test",
+            "mother_hs",
+            "early_ident",
+        ]
+    ]
+    .astype(float)
+    .values.T
+)
 
 with pm.Model() as model:
     # Impute missing values
-    sib_mean = pm.Exponential('sib_mean', 1.)
-    siblings_imp = pm.Poisson('siblings_imp', sib_mean,
-                              observed=siblings)
+    sib_mean = pm.Exponential("sib_mean", 1.0)
+    siblings_imp = pm.Poisson("siblings_imp", sib_mean, observed=siblings)
 
-    p_disab = pm.Beta('p_disab', 1., 1.)
+    p_disab = pm.Beta("p_disab", 1.0, 1.0)
     disability_imp = pm.Bernoulli(
-        'disability_imp', p_disab, observed=masked_values(disability, value=-999))
+        "disability_imp", p_disab, observed=masked_values(disability, value=-999)
+    )
 
-    p_mother = pm.Beta('p_mother', 1., 1.)
-    mother_imp = pm.Bernoulli('mother_imp', p_mother,
-                              observed=masked_values(mother_hs, value=-999))
+    p_mother = pm.Beta("p_mother", 1.0, 1.0)
+    mother_imp = pm.Bernoulli(
+        "mother_imp", p_mother, observed=masked_values(mother_hs, value=-999)
+    )
 
-    s = pm.HalfCauchy('s', 5., testval=5)
-    beta = pm.Laplace('beta', 0., 100., shape=7, testval=.1)
+    s = pm.HalfCauchy("s", 5.0, testval=5)
+    beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1)
 
-    expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp + beta[3] * disability_imp +
-                      beta[4] * age + beta[5] * mother_imp + beta[6] * early_ident)
+    expected_score = (
+        beta[0]
+        + beta[1] * male
+        + beta[2] * siblings_imp
+        + beta[3] * disability_imp
+        + beta[4] * age
+        + beta[5] * mother_imp
+        + beta[6] * early_ident
+    )
 
-    observed_score = pm.Normal(
-        'observed_score', expected_score, s, observed=score)
+    observed_score = pm.Normal("observed_score", expected_score, s, observed=score)
 
 
 with model:
     start = pm.find_MAP()
     step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start)
-    step2 = pm.BinaryGibbsMetropolis([mother_imp.missing_values,
-                                      disability_imp.missing_values])
+    step2 = pm.BinaryGibbsMetropolis(
+        [mother_imp.missing_values, disability_imp.missing_values]
+    )
 
 
 def run(n=5000):
-    if n == 'short':
+    if n == "short":
         n = 100
     with model:
         pm.sample(n, step=[step1, step2], start=start)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/lightspeed_example.py b/pymc3/examples/lightspeed_example.py
index 23c09f728b..d17a2acc99 100644
--- a/pymc3/examples/lightspeed_example.py
+++ b/pymc3/examples/lightspeed_example.py
@@ -1,11 +1,76 @@
 import numpy as np
 import pymc3 as pm
 
-light_speed = np.array([28, 26, 33, 24, 34, -44, 27, 16, 40, -2, 29, 22, 24, 21, 25,
-                        30, 23, 29, 31, 19, 24, 20, 36, 32, 36, 28, 25, 21, 28, 29,
-                        37, 25, 28, 26, 30, 32, 36, 26, 30, 22, 36, 23, 27, 27, 28,
-                        27, 31, 27, 26, 33, 26, 32, 32, 24, 39, 28, 24, 25, 32, 25,
-                        29, 27, 28, 29, 16, 23])
+light_speed = np.array(
+    [
+        28,
+        26,
+        33,
+        24,
+        34,
+        -44,
+        27,
+        16,
+        40,
+        -2,
+        29,
+        22,
+        24,
+        21,
+        25,
+        30,
+        23,
+        29,
+        31,
+        19,
+        24,
+        20,
+        36,
+        32,
+        36,
+        28,
+        25,
+        21,
+        28,
+        29,
+        37,
+        25,
+        28,
+        26,
+        30,
+        32,
+        36,
+        26,
+        30,
+        22,
+        36,
+        23,
+        27,
+        27,
+        28,
+        27,
+        31,
+        27,
+        26,
+        33,
+        26,
+        32,
+        32,
+        24,
+        39,
+        28,
+        24,
+        25,
+        32,
+        25,
+        29,
+        27,
+        28,
+        29,
+        16,
+        23,
+    ]
+)
 
 model_1 = pm.Model()
 
@@ -15,13 +80,15 @@
     # sigma = pm.Uniform('sigma', lower = 0, upper= np.inf)
 
     # using vague priors works
-    mu = pm.Uniform('mu', lower=light_speed.std() / 1000.0,
-                    upper=light_speed.std() * 1000.0)
-    sigma = pm.Uniform('sigma', lower=light_speed.std() /
-                       1000.0, upper=light_speed.std() * 1000.0)
+    mu = pm.Uniform(
+        "mu", lower=light_speed.std() / 1000.0, upper=light_speed.std() * 1000.0
+    )
+    sigma = pm.Uniform(
+        "sigma", lower=light_speed.std() / 1000.0, upper=light_speed.std() * 1000.0
+    )
 
     # define likelihood
-    y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=light_speed)
+    y_obs = pm.Normal("Y_obs", mu=mu, sd=sigma, observed=light_speed)
 
 
 def run(n=5000):
@@ -31,5 +98,5 @@ def run(n=5000):
         pm.summary(trace)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/rankdata_ordered.py b/pymc3/examples/rankdata_ordered.py
index b7fd0000b4..8edea3eac6 100644
--- a/pymc3/examples/rankdata_ordered.py
+++ b/pymc3/examples/rankdata_ordered.py
@@ -22,39 +22,41 @@
 
 
 with pm.Model() as m:
-    mu_hat = pm.Normal('mu_hat', 0, 1, shape=K-1)
+    mu_hat = pm.Normal("mu_hat", 0, 1, shape=K - 1)
     # set first value to 0 to avoid unidentified model
-    mu = tt.concatenate([[0.], mu_hat])
+    mu = tt.concatenate([[0.0], mu_hat])
     # sd = pm.HalfCauchy('sigma', 1.)
-    latent = pm.Normal('latent',
-                       mu=mu[y_argsort],
-                       sd=1.,  # using sd does not work yet
-                       transform=pm.distributions.transforms.ordered,
-                       shape=y_argsort.shape,
-                       testval=np.repeat(np.arange(K)[:,None], J, axis=1).T)
-                       # There are some problems using Ordered
-                       # right now, you need to specify testval
+    latent = pm.Normal(
+        "latent",
+        mu=mu[y_argsort],
+        sd=1.0,  # using sd does not work yet
+        transform=pm.distributions.transforms.ordered,
+        shape=y_argsort.shape,
+        testval=np.repeat(np.arange(K)[:, None], J, axis=1).T,
+    )
+    # There are some problems using Ordered
+    # right now, you need to specify testval
 
 
 def run(n=1500):
-    if n == 'short':
+    if n == "short":
         n = 50
 
     with m:
         trace = pm.sample(n)
 
-    pm.traceplot(trace, varnames=['mu_hat'])
+    pm.traceplot(trace, varnames=["mu_hat"])
 
-    print('Example observed data: ')
+    print("Example observed data: ")
     print(y[:30, :].T)
-    print('The true ranking is: ')
+    print("The true ranking is: ")
     print(yreal.flatten())
-    print('The Latent mean is: ')
-    latentmu = np.hstack(([0], pm.summary(trace, varnames=['mu_hat'])['mean'].values))
+    print("The Latent mean is: ")
+    latentmu = np.hstack(([0], pm.summary(trace, varnames=["mu_hat"])["mean"].values))
     print(np.round(latentmu, 2))
-    print('The estimated ranking is: ')
+    print("The estimated ranking is: ")
     print(np.argsort(latentmu))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     run()
diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 14b87d7958..89f87200c8 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -19,6 +19,7 @@
 # different behaviour with respect to blocking.
 USE_XY = True
 
+
 def run(steppers, p):
     steppers = set(steppers)
     traces = {}
@@ -27,52 +28,46 @@ def run(steppers, p):
 
     with pm.Model() as model:
         if USE_XY:
-            x = pm.Flat('x')
-            y = pm.Flat('y')
-            mu = np.array([0.,0.])
-            cov = np.array([[1.,p],[p,1.]])
-            z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y]))
-            pot = pm.Potential('logp_xy', z)
-            start = {'x': 0, 'y': 0}
+            x = pm.Flat("x")
+            y = pm.Flat("y")
+            mu = np.array([0.0, 0.0])
+            cov = np.array([[1.0, p], [p, 1.0]])
+            z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x, y]))
+            pot = pm.Potential("logp_xy", z)
+            start = {"x": 0, "y": 0}
         else:
-            mu = np.array([0.,0.])
-            cov = np.array([[1.,p],[p,1.]])
-            z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,))
-            start={'z': [0, 0]}
+            mu = np.array([0.0, 0.0])
+            cov = np.array([[1.0, p], [p, 1.0]])
+            z = pm.MvNormal("z", mu=mu, cov=cov, shape=(2,))
+            start = {"z": [0, 0]}
 
         for step_cls in steppers:
             name = step_cls.__name__
             t_start = time.time()
             mt = pm.sample(
-                draws=10000,
-                chains=16, parallelize=False,
-                step=step_cls(),
-                start=start
+                draws=10000, chains=16, parallelize=False, step=step_cls(), start=start
             )
             runtimes[name] = time.time() - t_start
-            print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains))
+            print(
+                "{} samples across {} chains".format(len(mt) * mt.nchains, mt.nchains)
+            )
             traces[name] = mt
             en = pm.diagnostics.effective_n(mt)
-            print('effective: {}\r\n'.format(en))
+            print("effective: {}\r\n".format(en))
             if USE_XY:
-                effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
+                effn[name] = np.mean(en["x"]) / len(mt) / mt.nchains
             else:
-                effn[name] = np.mean(en['z']) / len(mt) / mt.nchains
+                effn[name] = np.mean(en["z"]) / len(mt) / mt.nchains
     return traces, effn, runtimes
 
 
-if __name__ == '__main__':
-    methods = [
-        pm.Metropolis,
-        pm.Slice,
-        pm.NUTS,
-        pm.DEMetropolis
-    ]
+if __name__ == "__main__":
+    methods = [pm.Metropolis, pm.Slice, pm.NUTS, pm.DEMetropolis]
     names = [c.__name__ for c in methods]
 
-    df_base = pd.DataFrame(columns=['p'] + names)
-    df_base['p'] = [.0,.9]
-    df_base = df_base.set_index('p')
+    df_base = pd.DataFrame(columns=["p"] + names)
+    df_base["p"] = [0.0, 0.9]
+    df_base = df_base.set_index("p")
 
     df_effectiven = df_base.copy()
     df_runtime = df_base.copy()
@@ -85,16 +80,16 @@ def run(steppers, p):
             df_runtime.set_value(p, name, runtime[name])
             df_performance.set_value(p, name, rate[name] / runtime[name])
 
-    print('\r\nEffective sample size [0...1]')
-    print(df_effectiven.T.to_string(float_format='{:.3f}'.format))
+    print("\r\nEffective sample size [0...1]")
+    print(df_effectiven.T.to_string(float_format="{:.3f}".format))
 
-    print('\r\nRuntime [s]')
-    print(df_runtime.T.to_string(float_format='{:.1f}'.format))
+    print("\r\nRuntime [s]")
+    print(df_runtime.T.to_string(float_format="{:.1f}".format))
 
-    if 'NUTS' in names:
-        print('\r\nNormalized effective sampling rate [0...1]')
-        df_performance = df_performance.T / df_performance.loc[0]['NUTS']
+    if "NUTS" in names:
+        print("\r\nNormalized effective sampling rate [0...1]")
+        df_performance = df_performance.T / df_performance.loc[0]["NUTS"]
     else:
-        print('\r\nNormalized effective sampling rate [1/s]')
+        print("\r\nNormalized effective sampling rate [1/s]")
         df_performance = df_performance.T
-    print(df_performance.to_string(float_format='{:.3f}'.format))
+    print(df_performance.to_string(float_format="{:.3f}".format))
diff --git a/pymc3/examples/simpletest.py b/pymc3/examples/simpletest.py
index d67f63176b..2f83391bb1 100644
--- a/pymc3/examples/simpletest.py
+++ b/pymc3/examples/simpletest.py
@@ -3,15 +3,15 @@
 
 # import pydevd
 # pydevd.set_pm_excepthook()
-np.seterr(invalid='raise')
+np.seterr(invalid="raise")
 
 data = np.random.normal(size=(2, 20))
 
 
 with pm.Model() as model:
-    x = pm.Normal('x', mu=.5, sd=2., shape=(2, 1))
-    z = pm.Beta('z', alpha=10, beta=5.5)
-    d = pm.Normal('data', mu=x, sd=.75, observed=data)
+    x = pm.Normal("x", mu=0.5, sd=2.0, shape=(2, 1))
+    z = pm.Beta("z", alpha=10, beta=5.5)
+    d = pm.Normal("data", mu=x, sd=0.75, observed=data)
 
 
 def run(n=1000):
@@ -19,7 +19,8 @@ def run(n=1000):
         n = 50
     with model:
         trace = pm.sample(n)
-    pm.traceplot(trace, varnames=['x'])
+    pm.traceplot(trace, varnames=["x"])
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run()
diff --git a/pymc3/exceptions.py b/pymc3/exceptions.py
index fd9afd76ea..7e58f34c06 100644
--- a/pymc3/exceptions.py
+++ b/pymc3/exceptions.py
@@ -1,4 +1,4 @@
-__all__ = ['SamplingError']
+__all__ = ["SamplingError"]
 
 
 class SamplingError(RuntimeError):
diff --git a/pymc3/glm/families.py b/pymc3/glm/families.py
index 4a15339993..2dfc0a0902 100644
--- a/pymc3/glm/families.py
+++ b/pymc3/glm/families.py
@@ -6,7 +6,7 @@
 from ..model import modelcontext
 from .. import distributions as pm_dists
 
-__all__ = ['Normal', 'StudentT', 'Binomial', 'Poisson', 'NegativeBinomial']
+__all__ = ["Normal", "StudentT", "Binomial", "Poisson", "NegativeBinomial"]
 
 # Define link functions
 
@@ -14,11 +14,11 @@
 # it as a method.
 
 
-class Identity():
-
+class Identity:
     def __call__(self, x):
         return x
 
+
 identity = Identity()
 logit = tt.nnet.sigmoid
 inverse = tt.inv
@@ -28,19 +28,20 @@ def __call__(self, x):
 class Family(object):
     """Base class for Family of likelihood distribution and link functions.
     """
+
     priors = {}
     link = None
 
     def __init__(self, **kwargs):
         # Overwrite defaults
         for key, val in kwargs.items():
-            if key == 'priors':
+            if key == "priors":
                 self.priors = copy(self.priors)
                 self.priors.update(val)
             else:
                 setattr(self, key, val)
 
-    def _get_priors(self, model=None, name=''):
+    def _get_priors(self, model=None, name=""):
         """Return prior distributions of the likelihood.
 
         Returns
@@ -48,14 +49,14 @@ def _get_priors(self, model=None, name=''):
         dict : mapping name -> pymc3 distribution
         """
         if name:
-            name = '{}_'.format(name)
+            name = "{}_".format(name)
         model = modelcontext(model)
         priors = {}
         for key, val in self.priors.items():
             if isinstance(val, (numbers.Number, np.ndarray, np.generic)):
                 priors[key] = val
             else:
-                priors[key] = model.Var('{}{}'.format(name, key), val)
+                priors[key] = model.Var("{}{}".format(name, key), val)
 
         return priors
 
@@ -73,48 +74,55 @@ def create_likelihood(self, name, y_est, y_data, model=None):
         # Wrap y_est in link function
         priors[self.parent] = self.link(y_est)
         if name:
-            name = '{}_'.format(name)
-        return self.likelihood('{}y'.format(name), observed=y_data, **priors)
+            name = "{}_".format(name)
+        return self.likelihood("{}y".format(name), observed=y_data, **priors)
 
     def __repr__(self):
         return """Family {klass}:
     Likelihood   : {likelihood}({parent})
     Priors       : {priors}
-    Link function: {link}.""".format(klass=self.__class__, likelihood=self.likelihood.__name__, parent=self.parent, priors=self.priors, link=self.link)
+    Link function: {link}.""".format(
+            klass=self.__class__,
+            likelihood=self.likelihood.__name__,
+            parent=self.parent,
+            priors=self.priors,
+            link=self.link,
+        )
 
 
 class StudentT(Family):
     link = identity
     likelihood = pm_dists.StudentT
-    parent = 'mu'
-    priors = {'lam': pm_dists.HalfCauchy.dist(beta=10, testval=1.),
-              'nu': 1}
+    parent = "mu"
+    priors = {"lam": pm_dists.HalfCauchy.dist(beta=10, testval=1.0), "nu": 1}
 
 
 class Normal(Family):
     link = identity
     likelihood = pm_dists.Normal
-    parent = 'mu'
-    priors = {'sd': pm_dists.HalfCauchy.dist(beta=10, testval=1.)}
+    parent = "mu"
+    priors = {"sd": pm_dists.HalfCauchy.dist(beta=10, testval=1.0)}
 
 
 class Binomial(Family):
     link = logit
     likelihood = pm_dists.Binomial
-    parent = 'p'
-    priors = {'n': 1}
+    parent = "p"
+    priors = {"n": 1}
 
 
 class Poisson(Family):
     link = exp
     likelihood = pm_dists.Poisson
-    parent = 'mu'
-    priors = {'mu': pm_dists.HalfCauchy.dist(beta=10, testval=1.)}
+    parent = "mu"
+    priors = {"mu": pm_dists.HalfCauchy.dist(beta=10, testval=1.0)}
 
 
 class NegativeBinomial(Family):
     link = exp
     likelihood = pm_dists.NegativeBinomial
-    parent = 'mu'
-    priors = {'mu': pm_dists.HalfCauchy.dist(beta=10, testval=1.),
-              'alpha': pm_dists.HalfCauchy.dist(beta=10, testval=1.)}
+    parent = "mu"
+    priors = {
+        "mu": pm_dists.HalfCauchy.dist(beta=10, testval=1.0),
+        "alpha": pm_dists.HalfCauchy.dist(beta=10, testval=1.0),
+    }
diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py
index f795f1909c..dc0bd468f5 100644
--- a/pymc3/glm/linear.py
+++ b/pymc3/glm/linear.py
@@ -6,10 +6,7 @@
 from .utils import any_to_tensor_and_labels
 
 
-__all__ = [
-    'LinearComponent',
-    'GLM'
-]
+__all__ = ["LinearComponent", "GLM"]
 
 
 class LinearComponent(Model):
@@ -32,11 +29,22 @@ class LinearComponent(Model):
         this can be used to specify an a priori known component to be
         included in the linear predictor during fitting.
     """
-    default_regressor_prior = Normal.dist(mu=0, tau=1.0E-6)
+
+    default_regressor_prior = Normal.dist(mu=0, tau=1.0e-6)
     default_intercept_prior = Flat.dist()
 
-    def __init__(self, x, y, intercept=True, labels=None,
-                 priors=None, vars=None, name='', model=None, offset=0.):
+    def __init__(
+        self,
+        x,
+        y,
+        intercept=True,
+        labels=None,
+        priors=None,
+        vars=None,
+        name="",
+        model=None,
+        offset=0.0,
+    ):
         super(LinearComponent, self).__init__(name, model)
         if priors is None:
             priors = {}
@@ -45,23 +53,16 @@ def __init__(self, x, y, intercept=True, labels=None,
         x, labels = any_to_tensor_and_labels(x, labels)
         # now we have x, shape and labels
         if intercept:
-            x = tt.concatenate(
-                [tt.ones((x.shape[0], 1), x.dtype), x],
-                axis=1
-            )
-            labels = ['Intercept'] + labels
+            x = tt.concatenate([tt.ones((x.shape[0], 1), x.dtype), x], axis=1)
+            labels = ["Intercept"] + labels
         coeffs = list()
         for name in labels:
-            if name == 'Intercept':
+            if name == "Intercept":
                 if name in vars:
                     v = Deterministic(name, vars[name])
                 else:
                     v = self.Var(
-                        name=name,
-                        dist=priors.get(
-                            name,
-                            self.default_intercept_prior
-                        )
+                        name=name, dist=priors.get(name, self.default_intercept_prior)
                     )
                 coeffs.append(v)
             else:
@@ -71,26 +72,32 @@ def __init__(self, x, y, intercept=True, labels=None,
                     v = self.Var(
                         name=name,
                         dist=priors.get(
-                            name,
-                            priors.get(
-                                'Regressor',
-                                self.default_regressor_prior
-                            )
-                        )
+                            name, priors.get("Regressor", self.default_regressor_prior)
+                        ),
                     )
                 coeffs.append(v)
         self.coeffs = tt.stack(coeffs, axis=0)
         self.y_est = x.dot(self.coeffs) + offset
 
     @classmethod
-    def from_formula(cls, formula, data, priors=None, vars=None,
-                     name='', model=None, offset=0.):
+    def from_formula(
+        cls, formula, data, priors=None, vars=None, name="", model=None, offset=0.0
+    ):
         import patsy
+
         y, x = patsy.dmatrices(formula, data)
         labels = x.design_info.column_names
-        return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
-                   labels=labels, priors=priors, vars=vars, name=name,
-                   model=model, offset=offset)
+        return cls(
+            np.asarray(x),
+            np.asarray(y)[:, -1],
+            intercept=False,
+            labels=labels,
+            priors=priors,
+            vars=vars,
+            name=name,
+            model=model,
+            offset=offset,
+        )
 
 
 class GLM(LinearComponent):
@@ -115,13 +122,30 @@ class GLM(LinearComponent):
         this can be used to specify an a priori known component to be
         included in the linear predictor during fitting.
     """
-    def __init__(self, x, y, intercept=True, labels=None,
-                 priors=None, vars=None, family='normal', name='',
-                 model=None, offset=0.):
+
+    def __init__(
+        self,
+        x,
+        y,
+        intercept=True,
+        labels=None,
+        priors=None,
+        vars=None,
+        family="normal",
+        name="",
+        model=None,
+        offset=0.0,
+    ):
         super(GLM, self).__init__(
-            x, y, intercept=intercept, labels=labels,
-            priors=priors, vars=vars, name=name, 
-            model=model, offset=offset
+            x,
+            y,
+            intercept=intercept,
+            labels=labels,
+            priors=priors,
+            vars=vars,
+            name=name,
+            model=model,
+            offset=offset,
         )
 
         _families = dict(
@@ -134,19 +158,37 @@ def __init__(self, x, y, intercept=True, labels=None,
         if isinstance(family, str):
             family = _families[family]()
         self.y_est = family.create_likelihood(
-            name='', y_est=self.y_est,
-            y_data=y, model=self)
+            name="", y_est=self.y_est, y_data=y, model=self
+        )
 
     @classmethod
-    def from_formula(cls, formula, data, priors=None,
-                     vars=None, family='normal', name='',
-                     model=None, offset=0.):
+    def from_formula(
+        cls,
+        formula,
+        data,
+        priors=None,
+        vars=None,
+        family="normal",
+        name="",
+        model=None,
+        offset=0.0,
+    ):
         import patsy
+
         y, x = patsy.dmatrices(formula, data)
         labels = x.design_info.column_names
-        return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
-                   labels=labels, priors=priors, vars=vars, family=family,
-                   name=name, model=model, offset=offset)
+        return cls(
+            np.asarray(x),
+            np.asarray(y)[:, -1],
+            intercept=False,
+            labels=labels,
+            priors=priors,
+            vars=vars,
+            family=family,
+            name=name,
+            model=model,
+            offset=offset,
+        )
 
 
 glm = GLM
diff --git a/pymc3/glm/utils.py b/pymc3/glm/utils.py
index f2d8393f4e..638e5458ee 100644
--- a/pymc3/glm/utils.py
+++ b/pymc3/glm/utils.py
@@ -72,7 +72,7 @@ def any_to_tensor_and_labels(x, labels=None):
     elif not isinstance(x, tt.Variable):
         x = np.asarray(x)
         if x.ndim == 0:
-            raise ValueError('Cannot use scalars')
+            raise ValueError("Cannot use scalars")
         elif x.ndim == 1:
             x = x[:, None]
     # something really strange goes here,
@@ -81,28 +81,26 @@ def any_to_tensor_and_labels(x, labels=None):
     elif labels is not None:
         x = tt.as_tensor_variable(x)
         if x.ndim == 0:
-            raise ValueError('Cannot use scalars')
+            raise ValueError("Cannot use scalars")
         elif x.ndim == 1:
             x = x[:, None]
-    else:   # trust input
+    else:  # trust input
         pass
     # we should check that we can extract labels
     if labels is None and not isinstance(x, tt.Variable):
-        labels = ['x%d' % i for i in range(x.shape[1])]
+        labels = ["x%d" % i for i in range(x.shape[1])]
     # for theano variables we should have labels from user
     elif labels is None:
-        raise ValueError('Please provide labels as '
-                         'we cannot infer shape of input')
-    else:   # trust labels, user knows what he is doing
+        raise ValueError("Please provide labels as " "we cannot infer shape of input")
+    else:  # trust labels, user knows what he is doing
         pass
     # it's time to check shapes if we can
     if not isinstance(x, tt.Variable):
         if not len(labels) == x.shape[1]:
             raise ValueError(
-                'Please provide full list '
-                'of labels for coefficients, '
-                'got len(labels)=%d instead of %d'
-                % (len(labels), x.shape[1])
+                "Please provide full list "
+                "of labels for coefficients, "
+                "got len(labels)=%d instead of %d" % (len(labels), x.shape[1])
             )
     else:
         # trust labels, as we raised an
@@ -110,7 +108,7 @@ def any_to_tensor_and_labels(x, labels=None):
         pass
     # convert labels to list
     if isinstance(labels, pd.RangeIndex):
-        labels = ['x%d' % i for i in labels]
+        labels = ["x%d" % i for i in labels]
     # maybe it was a tuple ot whatever
     elif not isinstance(labels, list):
         labels = list(labels)
@@ -119,7 +117,7 @@ def any_to_tensor_and_labels(x, labels=None):
         x = tt.as_tensor_variable(x)
         # finally check dimensions
         if x.ndim == 0:
-            raise ValueError('Cannot use scalars')
+            raise ValueError("Cannot use scalars")
         elif x.ndim == 1:
             x = x[:, None]
     return x, labels
diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py
index 5339548666..80ac95ce1b 100644
--- a/pymc3/gp/cov.py
+++ b/pymc3/gp/cov.py
@@ -3,26 +3,28 @@
 from functools import reduce
 from operator import mul, add
 
-__all__ = ['Constant',
-           'WhiteNoise',
-           'ExpQuad',
-           'RatQuad',
-           'Exponential',
-           'Matern52',
-           'Matern32',
-           'Linear',
-           'Polynomial',
-           'Cosine',
-           'Periodic',
-           'WarpedInput',
-           'Gibbs',
-           'Coregion',
-           'ScaledCov',
-           'Kron']
+__all__ = [
+    "Constant",
+    "WhiteNoise",
+    "ExpQuad",
+    "RatQuad",
+    "Exponential",
+    "Matern52",
+    "Matern32",
+    "Linear",
+    "Polynomial",
+    "Cosine",
+    "Periodic",
+    "WarpedInput",
+    "Gibbs",
+    "Coregion",
+    "ScaledCov",
+    "Kron",
+]
 
 
 class Covariance(object):
-    R"""
+    r"""
     Base class for all kernels/covariance functions.
 
     Parameters
@@ -43,7 +45,7 @@ def __init__(self, input_dim, active_dims=None):
             self.active_dims = np.asarray(active_dims, np.int)
 
     def __call__(self, X, Xs=None, diag=False):
-        R"""
+        r"""
         Evaluate the kernel/covariance function.
 
         Parameters
@@ -103,8 +105,13 @@ def __array_wrap__(self, result):
 
 class Combination(Covariance):
     def __init__(self, factor_list):
-        input_dim = max([factor.input_dim for factor in factor_list
-                             if isinstance(factor, Covariance)])
+        input_dim = max(
+            [
+                factor.input_dim
+                for factor in factor_list
+                if isinstance(factor, Covariance)
+            ]
+        )
         super(Combination, self).__init__(input_dim=input_dim)
         self.factor_list = []
         for factor in factor_list:
@@ -124,9 +131,14 @@ def merge_factors(self, X, Xs=None, diag=False):
                     factor_list.append(np.diag(factor))
                 else:
                     factor_list.append(factor)
-            elif isinstance(factor, (tt.TensorConstant,
-                                     tt.TensorVariable,
-                                     tt.sharedvar.TensorSharedVariable)):
+            elif isinstance(
+                factor,
+                (
+                    tt.TensorConstant,
+                    tt.TensorVariable,
+                    tt.sharedvar.TensorSharedVariable,
+                ),
+            ):
                 if factor.ndim == 2 and diag:
                     factor_list.append(tt.diag(factor))
                 else:
@@ -147,7 +159,7 @@ def __call__(self, X, Xs=None, diag=False):
 
 
 class Kron(Covariance):
-    R"""Form a covariance object that is the kronecker product of other covariances.
+    r"""Form a covariance object that is the kronecker product of other covariances.
 
     In contrast to standard multiplication, where each covariance is given the
     same inputs X and Xs, kronecker product covariances first split the inputs
@@ -179,13 +191,14 @@ def _split(self, X, Xs):
 
     def __call__(self, X, Xs=None, diag=False):
         X_split, Xs_split = self._split(X, Xs)
-        covs = [cov(x, xs, diag) for cov, x, xs
-                in zip(self.factor_list, X_split, Xs_split)]
+        covs = [
+            cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split)
+        ]
         return reduce(mul, covs)
 
 
 class Constant(Covariance):
-    R"""
+    r"""
     Constant valued covariance function.
 
     .. math::
@@ -208,7 +221,7 @@ def full(self, X, Xs=None):
 
 
 class WhiteNoise(Covariance):
-    R"""
+    r"""
     White noise covariance function.
 
     .. math::
@@ -231,7 +244,7 @@ def full(self, X, Xs=None):
 
 
 class Stationary(Covariance):
-    R"""
+    r"""
     Base class for stationary kernels/covariance functions.
 
     Parameters
@@ -256,13 +269,15 @@ def square_dist(self, X, Xs):
         X = tt.mul(X, 1.0 / self.ls)
         X2 = tt.sum(tt.square(X), 1)
         if Xs is None:
-            sqd = (-2.0 * tt.dot(X, tt.transpose(X))
-                   + (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))))
+            sqd = -2.0 * tt.dot(X, tt.transpose(X)) + (
+                tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))
+            )
         else:
             Xs = tt.mul(Xs, 1.0 / self.ls)
             Xs2 = tt.sum(tt.square(Xs), 1)
-            sqd = (-2.0 * tt.dot(X, tt.transpose(Xs))
-                   + (tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))))
+            sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + (
+                tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))
+            )
         return tt.clip(sqd, 0.0, np.inf)
 
     def euclidean_dist(self, X, Xs):
@@ -277,7 +292,7 @@ def full(self, X, Xs=None):
 
 
 class Periodic(Stationary):
-    R"""
+    r"""
     The Periodic kernel.
 
     .. math::
@@ -287,19 +302,20 @@ class Periodic(Stationary):
     def __init__(self, input_dim, period, ls=None, ls_inv=None, active_dims=None):
         super(Periodic, self).__init__(input_dim, ls, ls_inv, active_dims)
         self.period = period
+
     def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
         if Xs is None:
             Xs = X
-        f1 = X.dimshuffle(0, 'x', 1)
-        f2 = Xs.dimshuffle('x', 0, 1)
+        f1 = X.dimshuffle(0, "x", 1)
+        f2 = Xs.dimshuffle("x", 0, 1)
         r = np.pi * (f1 - f2) / self.period
         r = tt.sum(tt.square(tt.sin(r) / self.ls), 2)
         return tt.exp(-0.5 * r)
 
 
 class ExpQuad(Stationary):
-    R"""
+    r"""
     The Exponentiated Quadratic kernel.  Also refered to as the Squared
     Exponential, or Radial Basis Function kernel.
 
@@ -314,7 +330,7 @@ def full(self, X, Xs=None):
 
 
 class RatQuad(Stationary):
-    R"""
+    r"""
     The Rational Quadratic kernel.
 
     .. math::
@@ -328,12 +344,14 @@ def __init__(self, input_dim, alpha, ls=None, ls_inv=None, active_dims=None):
 
     def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
-        return (tt.power((1.0 + 0.5 * self.square_dist(X, Xs)
-                         * (1.0 / self.alpha)), -1.0 * self.alpha))
+        return tt.power(
+            (1.0 + 0.5 * self.square_dist(X, Xs) * (1.0 / self.alpha)),
+            -1.0 * self.alpha,
+        )
 
 
 class Matern52(Stationary):
-    R"""
+    r"""
     The Matern kernel with nu = 5/2.
 
     .. math::
@@ -346,12 +364,13 @@ class Matern52(Stationary):
     def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
         r = self.euclidean_dist(X, Xs)
-        return ((1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r))
-                * tt.exp(-1.0 * np.sqrt(5.0) * r))
+        return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) * tt.exp(
+            -1.0 * np.sqrt(5.0) * r
+        )
 
 
 class Matern32(Stationary):
-    R"""
+    r"""
     The Matern kernel with nu = 3/2.
 
     .. math::
@@ -367,7 +386,7 @@ def full(self, X, Xs=None):
 
 
 class Exponential(Stationary):
-    R"""
+    r"""
     The Exponential kernel.
 
     .. math::
@@ -381,7 +400,7 @@ def full(self, X, Xs=None):
 
 
 class Cosine(Stationary):
-    R"""
+    r"""
     The Cosine kernel.
 
     .. math::
@@ -394,7 +413,7 @@ def full(self, X, Xs=None):
 
 
 class Linear(Covariance):
-    R"""
+    r"""
     The Linear kernel.
 
     .. math::
@@ -424,7 +443,7 @@ def diag(self, X):
 
 
 class Polynomial(Linear):
-    R"""
+    r"""
     The Polynomial kernel.
 
     .. math::
@@ -446,7 +465,7 @@ def diag(self, X):
 
 
 class WarpedInput(Covariance):
-    R"""
+    r"""
     Warp the inputs of any kernel using an arbitrary function
     defined using Theano.
 
@@ -462,8 +481,7 @@ class WarpedInput(Covariance):
         Additional inputs (besides X or Xs) to warp_func.
     """
 
-    def __init__(self, input_dim, cov_func, warp_func, args=None,
-                 active_dims=None):
+    def __init__(self, input_dim, cov_func, warp_func, args=None, active_dims=None):
         super(WarpedInput, self).__init__(input_dim, active_dims)
         if not callable(warp_func):
             raise TypeError("warp_func must be callable")
@@ -486,7 +504,7 @@ def diag(self, X):
 
 
 class Gibbs(Covariance):
-    R"""
+    r"""
     The Gibbs kernel.  Use an arbitrary lengthscale function defined
     using Theano.  Only tested in one dimension.
 
@@ -503,17 +521,18 @@ class Gibbs(Covariance):
         Additional inputs (besides X or Xs) to lengthscale_func.
     """
 
-    def __init__(self, input_dim, lengthscale_func, args=None,
-                 active_dims=None):
+    def __init__(self, input_dim, lengthscale_func, args=None, active_dims=None):
         super(Gibbs, self).__init__(input_dim, active_dims)
         if active_dims is not None:
             if len(active_dims) > 1:
-                raise NotImplementedError(("Higher dimensional inputs ",
-                                           "are untested"))
+                raise NotImplementedError(
+                    ("Higher dimensional inputs ", "are untested")
+                )
         else:
             if input_dim != 1:
-                raise NotImplementedError(("Higher dimensional inputs ",
-                                           "are untested"))
+                raise NotImplementedError(
+                    ("Higher dimensional inputs ", "are untested")
+                )
         if not callable(lengthscale_func):
             raise TypeError("lengthscale_func must be callable")
         self.lfunc = handle_args(lengthscale_func, args)
@@ -522,12 +541,14 @@ def __init__(self, input_dim, lengthscale_func, args=None,
     def square_dist(self, X, Xs=None):
         X2 = tt.sum(tt.square(X), 1)
         if Xs is None:
-            sqd = (-2.0 * tt.dot(X, tt.transpose(X))
-                   + (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))))
+            sqd = -2.0 * tt.dot(X, tt.transpose(X)) + (
+                tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))
+            )
         else:
             Xs2 = tt.sum(tt.square(Xs), 1)
-            sqd = (-2.0 * tt.dot(X, tt.transpose(Xs))
-                   + (tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))))
+            sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + (
+                tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1))
+            )
         return tt.clip(sqd, 0.0, np.inf)
 
     def full(self, X, Xs=None):
@@ -541,15 +562,16 @@ def full(self, X, Xs=None):
             r2 = self.square_dist(X, Xs)
         rx2 = tt.reshape(tt.square(rx), (-1, 1))
         rz2 = tt.reshape(tt.square(rz), (1, -1))
-        return (tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2))
-                * tt.exp(-1.0 * r2 / (rx2 + rz2)))
+        return tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) * tt.exp(
+            -1.0 * r2 / (rx2 + rz2)
+        )
 
     def diag(self, X):
         return tt.alloc(1.0, X.shape[0])
 
 
 class ScaledCov(Covariance):
-    R"""
+    r"""
     Construct a kernel by multiplying a base kernel with a scaling
     function defined using Theano.  The scaling function is
     non-negative, and can be parameterized.
@@ -566,6 +588,7 @@ class ScaledCov(Covariance):
     args : optional, tuple or list of scalars or PyMC3 variables
         Additional inputs (besides X or Xs) to lengthscale_func.
     """
+
     def __init__(self, input_dim, cov_func, scaling_func, args=None, active_dims=None):
         super(ScaledCov, self).__init__(input_dim, active_dims)
         if not callable(scaling_func):
@@ -593,7 +616,7 @@ def full(self, X, Xs=None):
 
 
 class Coregion(Covariance):
-    R"""Covariance function for intrinsic/linear coregionalization models.
+    r"""Covariance function for intrinsic/linear coregionalization models.
     Adapted from GPy http://gpy.readthedocs.io/en/deploy/GPy.kern.src.html#GPy.kern.src.coregionalize.Coregionalize.
 
     This covariance has the form:
@@ -628,10 +651,12 @@ class Coregion(Covariance):
     def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
         super(Coregion, self).__init__(input_dim, active_dims)
         if len(self.active_dims) != 1:
-            raise ValueError('Coregion requires exactly one dimension to be active')
+            raise ValueError("Coregion requires exactly one dimension to be active")
         make_B = W is not None or kappa is not None
         if make_B and B is not None:
-            raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion')
+            raise ValueError(
+                "Exactly one of (W, kappa) and B must be provided to Coregion"
+            )
         if make_B:
             self.W = tt.as_tensor_variable(W)
             self.kappa = tt.as_tensor_variable(kappa)
@@ -639,20 +664,22 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
         elif B is not None:
             self.B = tt.as_tensor_variable(B)
         else:
-            raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion')
+            raise ValueError(
+                "Exactly one of (W, kappa) and B must be provided to Coregion"
+            )
 
     def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
-        index = tt.cast(X, 'int32')
+        index = tt.cast(X, "int32")
         if Xs is None:
             index2 = index.T
         else:
-            index2 = tt.cast(Xs, 'int32').T
+            index2 = tt.cast(Xs, "int32").T
         return self.B[index, index2]
 
     def diag(self, X):
         X, _ = self._slice(X, None)
-        index = tt.cast(X, 'int32')
+        index = tt.cast(X, "int32")
         return tt.diag(self.B)[index.ravel()]
 
 
@@ -664,6 +691,5 @@ def f(x, args):
             if not isinstance(args, tuple):
                 args = (args,)
             return func(x, *args)
-    return f
-
 
+    return f
diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py
index 522352ba12..ef906c2807 100644
--- a/pymc3/gp/gp.py
+++ b/pymc3/gp/gp.py
@@ -7,18 +7,23 @@
 import pymc3 as pm
 from pymc3.gp.cov import Covariance, Constant
 from pymc3.gp.mean import Zero
-from pymc3.gp.util import (conditioned_vars, infer_shape,
-                           stabilize, cholesky, solve_lower, solve_upper)
+from pymc3.gp.util import (
+    conditioned_vars,
+    infer_shape,
+    stabilize,
+    cholesky,
+    solve_lower,
+    solve_upper,
+)
 from pymc3.distributions import draw_values
 from theano.tensor.nlinalg import eigh
-from ..math import (cartesian, kron_dot, kron_diag,
-                    kron_solve_lower, kron_solve_upper)
+from ..math import cartesian, kron_dot, kron_diag, kron_solve_lower, kron_solve_upper
 
-__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse', 'LatentKron', 'MarginalKron']
+__all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"]
 
 
 class Base(object):
-    R"""
+    r"""
     Base class.
     """
 
@@ -49,7 +54,7 @@ def predict(self, Xnew, point=None, given=None, diag=False):
 
 @conditioned_vars(["X", "f"])
 class Latent(Base):
-    R"""
+    r"""
     Latent Gaussian process.
 
     The `gp.Latent` class is a direct implementation of a GP.  No addiive
@@ -118,7 +123,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs):
         return f
 
     def prior(self, name, X, reparameterize=True, **kwargs):
-        R"""
+        r"""
         Returns the GP prior distribution evaluated over the input
         locations `X`.
 
@@ -150,14 +155,14 @@ def prior(self, name, X, reparameterize=True, **kwargs):
     def _get_given_vals(self, given):
         if given is None:
             given = {}
-        if 'gp' in given:
-            cov_total = given['gp'].cov_func
-            mean_total = given['gp'].mean_func
+        if "gp" in given:
+            cov_total = given["gp"].cov_func
+            mean_total = given["gp"].mean_func
         else:
             cov_total = self.cov_func
             mean_total = self.mean_func
-        if all(val in given for val in ['X', 'f']):
-            X, f = given['X'], given['f']
+        if all(val in given for val in ["X", "f"]):
+            X, f = given["X"], given["f"]
         else:
             X, f = self.X, self.f
         return X, f, cov_total, mean_total
@@ -174,7 +179,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total):
         return mu, cov
 
     def conditional(self, name, Xnew, given=None, **kwargs):
-        R"""
+        r"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
 
@@ -240,7 +245,9 @@ class TP(Latent):
 
     def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), nu=None):
         if nu is None:
-            raise ValueError("Student's T process requires a degrees of freedom parameter, 'nu'")
+            raise ValueError(
+                "Student's T process requires a degrees of freedom parameter, 'nu'"
+            )
         self.nu = nu
         super(TP, self).__init__(mean_func, cov_func)
 
@@ -254,13 +261,15 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs):
         if reparameterize:
             chi2 = pm.ChiSquared("chi2_", self.nu)
             v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs)
-            f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v)))
+            f = pm.Deterministic(
+                name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))
+            )
         else:
             f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs)
         return f
 
     def prior(self, name, X, reparameterize=True, **kwargs):
-        R"""
+        r"""
         Returns the TP prior distribution evaluated over the input
         locations `X`.
 
@@ -296,11 +305,11 @@ def _build_conditional(self, Xnew, X, f):
         mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v)
         beta = tt.dot(v, v)
         nu2 = self.nu + X.shape[0]
-        covT = (self.nu + beta - 2)/(nu2 - 2) * cov
+        covT = (self.nu + beta - 2) / (nu2 - 2) * cov
         return nu2, mu, covT
 
     def conditional(self, name, Xnew, **kwargs):
-        R"""
+        r"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
 
@@ -328,7 +337,7 @@ def conditional(self, name, Xnew, **kwargs):
 
 @conditioned_vars(["X", "y", "noise"])
 class Marginal(Base):
-    R"""
+    r"""
     Marginal Gaussian process.
 
     The `gp.Marginal` class is an implementation of the sum of a GP
@@ -383,7 +392,7 @@ def _build_marginal_likelihood(self, X, noise):
         return mu, cov
 
     def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
-        R"""
+        r"""
         Returns the marginal likelihood distribution, given the input
         locations `X` and the data `y`.
 
@@ -430,22 +439,23 @@ def _get_given_vals(self, given):
         if given is None:
             given = {}
 
-        if 'gp' in given:
-            cov_total = given['gp'].cov_func
-            mean_total = given['gp'].mean_func
+        if "gp" in given:
+            cov_total = given["gp"].cov_func
+            mean_total = given["gp"].mean_func
         else:
             cov_total = self.cov_func
             mean_total = self.mean_func
-        if all(val in given for val in ['X', 'y', 'noise']):
-            X, y, noise = given['X'], given['y'], given['noise']
+        if all(val in given for val in ["X", "y", "noise"]):
+            X, y, noise = given["X"], given["y"], given["noise"]
             if not isinstance(noise, Covariance):
                 noise = pm.gp.cov.WhiteNoise(noise)
         else:
             X, y, noise = self.X, self.y, self.noise
         return X, y, noise, cov_total, mean_total
 
-    def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise,
-                           cov_total, mean_total):
+    def _build_conditional(
+        self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total
+    ):
         Kxx = cov_total(X)
         Kxs = self.cov_func(X, Xnew)
         Knx = noise(X)
@@ -468,7 +478,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise,
             return mu, cov if pred_noise else stabilize(cov)
 
     def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
-        R"""
+        r"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
 
@@ -506,7 +516,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
         return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs)
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
-        R"""
+        r"""
         Return the mean vector and covariance matrix of the conditional
         distribution as numpy arrays, given a `point`, such as the MAP
         estimate or a sample from a `trace`.
@@ -534,7 +544,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
         return draw_values([mu, cov], point=point)
 
     def predictt(self, Xnew, diag=False, pred_noise=False, given=None):
-        R"""
+        r"""
         Return the mean vector and covariance matrix of the conditional
         distribution as symbolic variables.
 
@@ -559,7 +569,7 @@ def predictt(self, Xnew, diag=False, pred_noise=False, given=None):
 
 @conditioned_vars(["X", "Xu", "y", "sigma"])
 class MarginalSparse(Marginal):
-    R"""
+    r"""
     Approximate marginal Gaussian process.
 
     The `gp.MarginalSparse` class is an implementation of the sum of a GP
@@ -653,9 +663,9 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
             trace = 0.0
         elif self.approx == "VFE":
             Lamd = tt.ones_like(Qffd) * sigma2
-            trace = ((1.0 / (2.0 * sigma2)) *
-                     (tt.sum(self.cov_func(X, diag=True)) -
-                      tt.sum(tt.sum(A * A, 0))))
+            trace = (1.0 / (2.0 * sigma2)) * (
+                tt.sum(self.cov_func(X, diag=True)) - tt.sum(tt.sum(A * A, 0))
+            )
         else:  # DTC
             Lamd = tt.ones_like(Qffd) * sigma2
             trace = 0.0
@@ -669,8 +679,10 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
         quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
         return -1.0 * (constant + logdet + quadratic + trace)
 
-    def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs):
-        R"""
+    def marginal_likelihood(
+        self, name, X, Xu, y, noise=None, is_observed=True, **kwargs
+    ):
+        r"""
         Returns the approximate marginal likelihood distribution, given the input
         locations `X`, inducing point locations `Xu`, data `y`, and white noise
         standard deviations `sigma`.
@@ -701,25 +713,29 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
         self.Xu = Xu
         self.y = y
         if noise is None:
-            sigma = kwargs.get('sigma')
+            sigma = kwargs.get("sigma")
             if sigma is None:
-                raise ValueError('noise argument must be specified')
+                raise ValueError("noise argument must be specified")
             else:
                 self.sigma = sigma
                 warnings.warn(
                     "The 'sigma' argument has been deprecated. Use 'noise' instead.",
-                DeprecationWarning)
+                    DeprecationWarning,
+                )
         else:
             self.sigma = noise
-        logp = functools.partial(self._build_marginal_likelihood_logp,
-                                 X=X, Xu=Xu, sigma=noise)
+        logp = functools.partial(
+            self._build_marginal_likelihood_logp, X=X, Xu=Xu, sigma=noise
+        )
         if is_observed:
             return pm.DensityDist(name, logp, observed=y, **kwargs)
         else:
             shape = infer_shape(X, kwargs.pop("shape", None))
             return pm.DensityDist(name, logp, shape=shape, **kwargs)
 
-    def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
+    def _build_conditional(
+        self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total
+    ):
         sigma2 = tt.square(sigma)
         Kuu = cov_total(Xu)
         Kuf = cov_total(Xu, X)
@@ -738,7 +754,9 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total,
         c = solve_lower(L_B, tt.dot(A, r_l))
         Kus = self.cov_func(Xu, Xnew)
         As = solve_lower(Luu, Kus)
-        mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
+        mu = self.mean_func(Xnew) + tt.dot(
+            tt.transpose(As), solve_upper(tt.transpose(L_B), c)
+        )
         C = solve_lower(L_B, As)
         if diag:
             Kss = self.cov_func(Xnew, diag=True)
@@ -747,8 +765,11 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total,
                 var += sigma2
             return mu, var
         else:
-            cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
-                   tt.dot(tt.transpose(C), C))
+            cov = (
+                self.cov_func(Xnew)
+                - tt.dot(tt.transpose(As), As)
+                + tt.dot(tt.transpose(C), C)
+            )
             if pred_noise:
                 cov += sigma2 * tt.identity_like(cov)
             return mu, cov if pred_noise else stabilize(cov)
@@ -756,20 +777,20 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total,
     def _get_given_vals(self, given):
         if given is None:
             given = {}
-        if 'gp' in given:
-            cov_total = given['gp'].cov_func
-            mean_total = given['gp'].mean_func
+        if "gp" in given:
+            cov_total = given["gp"].cov_func
+            mean_total = given["gp"].mean_func
         else:
             cov_total = self.cov_func
             mean_total = self.mean_func
-        if all(val in given for val in ['X', 'Xu', 'y', 'sigma']):
-            X, Xu, y, sigma = given['X'], given['Xu'], given['y'], given['sigma']
+        if all(val in given for val in ["X", "Xu", "y", "sigma"]):
+            X, Xu, y, sigma = given["X"], given["Xu"], given["y"], given["sigma"]
         else:
             X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma
         return X, Xu, y, sigma, cov_total, mean_total
 
     def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
-        R"""
+        r"""
         Returns the approximate conditional distribution of the GP evaluated over
         new input locations `Xnew`.
 
@@ -800,7 +821,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
 
 @conditioned_vars(["Xs", "f"])
 class LatentKron(Base):
-    R"""
+    r"""
     Latent Gaussian process whose covariance is a tensor product kernel.
 
     The `gp.LatentKron` class is a direct implementation of a GP with a
@@ -861,7 +882,7 @@ def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))):
         super(LatentKron, self).__init__(mean_func, cov_func)
 
     def __add__(self, other):
-        raise TypeError('Additive, Kronecker-structured processes not implemented')
+        raise TypeError("Additive, Kronecker-structured processes not implemented")
 
     def _build_prior(self, name, Xs, **kwargs):
         self.N = np.prod([len(X) for X in Xs])
@@ -891,7 +912,7 @@ def prior(self, name, Xs, **kwargs):
             distribution constructor.
         """
         if len(Xs) != len(self.cov_funcs):
-            raise ValueError('Must provide a covariance function for each X')
+            raise ValueError("Must provide a covariance function for each X")
         f = self._build_prior(name, Xs, **kwargs)
         self.Xs = Xs
         self.f = f
@@ -952,7 +973,7 @@ def conditional(self, name, Xnew, **kwargs):
 
 @conditioned_vars(["Xs", "y", "sigma"])
 class MarginalKron(Base):
-    R"""
+    r"""
     Marginal Gaussian process whose covariance is a tensor product kernel.
 
     The `gp.MarginalKron` class is an implementation of the sum of a
@@ -1017,7 +1038,7 @@ def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))):
         super(MarginalKron, self).__init__(mean_func, cov_func)
 
     def __add__(self, other):
-        raise TypeError('Additive, Kronecker-structured processes not implemented')
+        raise TypeError("Additive, Kronecker-structured processes not implemented")
 
     def _build_marginal_likelihood(self, Xs):
         self.X = cartesian(*Xs)
@@ -1028,10 +1049,14 @@ def _build_marginal_likelihood(self, Xs):
     def _check_inputs(self, Xs, y):
         N = np.prod([len(X) for X in Xs])
         if len(Xs) != len(self.cov_funcs):
-            raise ValueError('Must provide a covariance function for each X')
+            raise ValueError("Must provide a covariance function for each X")
         if N != len(y):
-            raise ValueError(('Length of y ({}) must match length of cartesian'
-                              'cartesian product of Xs ({})').format(len(y), N))
+            raise ValueError(
+                (
+                    "Length of y ({}) must match length of cartesian"
+                    "cartesian product of Xs ({})"
+                ).format(len(y), N)
+            )
 
     def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
         """
@@ -1065,12 +1090,14 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
         self.y = y
         self.sigma = sigma
         if is_observed:
-            return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma,
-                                      observed=y, **kwargs)
+            return pm.KroneckerNormal(
+                name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs
+            )
         else:
             shape = np.prod([len(X) for X in Xs])
-            return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma,
-                                      shape=shape, **kwargs)
+            return pm.KroneckerNormal(
+                name, mu=mu, covs=covs, sigma=sigma, shape=shape, **kwargs
+            )
 
     def _build_conditional(self, Xnew, pred_noise, diag):
         Xs, y, sigma = self.Xs, self.y, self.sigma
@@ -1083,7 +1110,7 @@ def _build_conditional(self, Xnew, pred_noise, diag):
         QTs = list(map(tt.transpose, Qs))
         eigs = kron_diag(*eigs_sep)  # Combine separate eigs
         if sigma is not None:
-            eigs += sigma**2
+            eigs += sigma ** 2
 
         # New points
         Km = self.cov_func(Xnew, diag=diag)
@@ -1092,13 +1119,13 @@ def _build_conditional(self, Xnew, pred_noise, diag):
 
         # Build conditional mu
         alpha = kron_dot(QTs, delta)
-        alpha = alpha/eigs[:, None]
+        alpha = alpha / eigs[:, None]
         alpha = kron_dot(Qs, alpha)
         mu = tt.dot(Kmn, alpha).ravel() + self.mean_func(Xnew)
 
         # Build conditional cov
         A = kron_dot(QTs, Knm)
-        A = A/tt.sqrt(eigs[:, None])
+        A = A / tt.sqrt(eigs[:, None])
         if diag:
             Asq = tt.sum(tt.square(A), 0)
             cov = Km - Asq
@@ -1150,7 +1177,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs):
         return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs)
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False):
-        R"""
+        r"""
         Return the mean vector and covariance matrix of the conditional
         distribution as numpy arrays, given a `point`, such as the MAP
         estimate or a sample from a `trace`.
@@ -1173,7 +1200,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False):
         return draw_values([mu, cov], point=point)
 
     def predictt(self, Xnew, diag=False, pred_noise=False):
-        R"""
+        r"""
         Return the mean vector and covariance matrix of the conditional
         distribution as symbolic variables.
 
diff --git a/pymc3/gp/mean.py b/pymc3/gp/mean.py
index 638cc6d429..5d9ce1b206 100644
--- a/pymc3/gp/mean.py
+++ b/pymc3/gp/mean.py
@@ -1,15 +1,15 @@
 import theano.tensor as tt
 
-__all__ = ['Zero', 'Constant', 'Linear']
+__all__ = ["Zero", "Constant", "Linear"]
 
 
 class Mean(object):
-    R"""
+    r"""
     Base class for mean functions
     """
 
     def __call__(self, X):
-        R"""
+        r"""
         Evaluate the mean function.
 
         Parameters
@@ -26,7 +26,7 @@ def __mul__(self, other):
 
 
 class Zero(Mean):
-    R"""
+    r"""
     Zero mean function for Gaussian process.
 
     """
@@ -34,8 +34,9 @@ class Zero(Mean):
     def __call__(self, X):
         return tt.alloc(0.0, X.shape[0])
 
+
 class Constant(Mean):
-    R"""
+    r"""
     Constant mean function for Gaussian process.
 
     Parameters
@@ -53,7 +54,7 @@ def __call__(self, X):
 
 
 class Linear(Mean):
-    R"""
+    r"""
     Linear mean function for Gaussian process.
 
     Parameters
@@ -91,4 +92,3 @@ def __init__(self, first_mean, second_mean):
 
     def __call__(self, X):
         return tt.mul(self.m1(X), self.m2(X))
-
diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py
index 92fe23f583..7135b4fca2 100644
--- a/pymc3/gp/util.py
+++ b/pymc3/gp/util.py
@@ -3,9 +3,9 @@
 import theano.tensor as tt
 
 cholesky = tt.slinalg.cholesky
-solve_lower = tt.slinalg.Solve(A_structure='lower_triangular')
-solve_upper = tt.slinalg.Solve(A_structure='upper_triangular')
-solve = tt.slinalg.Solve(A_structure='general')
+solve_lower = tt.slinalg.Solve(A_structure="lower_triangular")
+solve_upper = tt.slinalg.Solve(A_structure="upper_triangular")
+solve = tt.slinalg.Solve(A_structure="general")
 
 
 def infer_shape(X, n_points=None):
@@ -29,10 +29,14 @@ def kmeans_inducing_points(n_inducing, X):
     elif isinstance(X, (np.ndarray, tuple, list)):
         X = np.asarray(X)
     else:
-        raise TypeError(("To use K-means initialization, "
-                         "please provide X as a type that "
-                         "can be cast to np.ndarray, instead "
-                         "of {}".format(type(X))))
+        raise TypeError(
+            (
+                "To use K-means initialization, "
+                "please provide X as a type that "
+                "can be cast to np.ndarray, instead "
+                "of {}".format(type(X))
+            )
+        )
     scaling = np.std(X, 0)
     # if std of a column is very small (zero), don't normalize that column
     scaling[scaling <= 1e-6] = 1.0
@@ -43,33 +47,51 @@ def kmeans_inducing_points(n_inducing, X):
 
 def conditioned_vars(varnames):
     """ Decorator for validating attrs that are conditioned on. """
+
     def gp_wrapper(cls):
         def make_getter(name):
             def getter(self):
                 value = getattr(self, name, None)
                 if value is None:
-                    raise AttributeError(("'{}' not set.  Provide as argument "
-                                          "to condition, or call 'prior' "
-                                          "first".format(name.lstrip("_"))))
+                    raise AttributeError(
+                        (
+                            "'{}' not set.  Provide as argument "
+                            "to condition, or call 'prior' "
+                            "first".format(name.lstrip("_"))
+                        )
+                    )
                 else:
                     return value
                 return getattr(self, name)
+
             return getter
 
         def make_setter(name):
             def setter(self, val):
                 setattr(self, name, val)
+
             return setter
 
         for name in varnames:
-            getter = make_getter('_' + name)
-            setter = make_setter('_' + name)
+            getter = make_getter("_" + name)
+            setter = make_setter("_" + name)
             setattr(cls, name, property(getter, setter))
         return cls
+
     return gp_wrapper
 
 
-def plot_gp_dist(ax, samples, x, plot_samples=True, palette="Reds", fill_alpha=0.8, samples_alpha=0.1, fill_kwargs=None, samples_kwargs=None):
+def plot_gp_dist(
+    ax,
+    samples,
+    x,
+    plot_samples=True,
+    palette="Reds",
+    fill_alpha=0.8,
+    samples_alpha=0.1,
+    fill_kwargs=None,
+    samples_kwargs=None,
+):
     """ A helper function for plotting 1D GP posteriors from trace 
     
         Parameters
@@ -112,13 +134,21 @@ def plot_gp_dist(ax, samples, x, plot_samples=True, palette="Reds", fill_alpha=0
     x = x.flatten()
     for i, p in enumerate(percs[::-1]):
         upper = np.percentile(samples, p, axis=1)
-        lower = np.percentile(samples, 100-p, axis=1)
+        lower = np.percentile(samples, 100 - p, axis=1)
         color_val = colors[i]
-        ax.fill_between(x, upper, lower, color=cmap(color_val), alpha=fill_alpha, **fill_kwargs)
+        ax.fill_between(
+            x, upper, lower, color=cmap(color_val), alpha=fill_alpha, **fill_kwargs
+        )
     if plot_samples:
         # plot a few samples
         idx = np.random.randint(0, samples.shape[1], 30)
-        ax.plot(x, samples[:,idx], color=cmap(0.9), lw=1, alpha=samples_alpha,
-                **samples_kwargs)
+        ax.plot(
+            x,
+            samples[:, idx],
+            color=cmap(0.9),
+            lw=1,
+            alpha=samples_alpha,
+            **samples_kwargs
+        )
 
     return ax
diff --git a/pymc3/math.py b/pymc3/math.py
index 25a057f724..6edfbe05fe 100644
--- a/pymc3/math.py
+++ b/pymc3/math.py
@@ -1,13 +1,51 @@
 from __future__ import division
 import sys
 import theano.tensor as tt
+
 # pylint: disable=unused-import
 import theano
 from theano.tensor import (
-    constant, flatten, zeros_like, ones_like, stack, concatenate, sum, prod,
-    lt, gt, le, ge, eq, neq, switch, clip, where, and_, or_, abs_, exp, log,
-    cos, sin, tan, cosh, sinh, tanh, sqr, sqrt, erf, erfc, erfinv, erfcinv, dot,
-    maximum, minimum, sgn, ceil, floor)
+    constant,
+    flatten,
+    zeros_like,
+    ones_like,
+    stack,
+    concatenate,
+    sum,
+    prod,
+    lt,
+    gt,
+    le,
+    ge,
+    eq,
+    neq,
+    switch,
+    clip,
+    where,
+    and_,
+    or_,
+    abs_,
+    exp,
+    log,
+    cos,
+    sin,
+    tan,
+    cosh,
+    sinh,
+    tanh,
+    sqr,
+    sqrt,
+    erf,
+    erfc,
+    erfinv,
+    erfcinv,
+    dot,
+    maximum,
+    minimum,
+    sgn,
+    ceil,
+    floor,
+)
 from theano.tensor.nlinalg import det, matrix_inverse, extract_diag, matrix_dot, trace
 import theano.tensor.slinalg
 import theano.sparse
@@ -43,7 +81,7 @@ def cartesian(*arrays):
             1D arrays where earlier arrays loop more slowly than later ones
     """
     N = len(arrays)
-    return np.stack(np.meshgrid(*arrays, indexing='ij'), -1).reshape(-1, N)
+    return np.stack(np.meshgrid(*arrays, indexing="ij"), -1).reshape(-1, N)
 
 
 def kron_matrix_op(krons, m, op):
@@ -58,6 +96,7 @@ def kron_matrix_op(krons, m, op):
     m    : NxM array or 1D array (treated as Nx1)
            Object that krons act upon
     """
+
     def flat_matrix_op(flat_mat, mat):
         Nmat = mat.shape[1]
         flat_shape = flat_mat.shape
@@ -70,7 +109,7 @@ def kron_vector_op(v):
     if m.ndim == 1:
         m = m[:, None]  # Treat 1D array as Nx1 matrix
     if m.ndim != 2:  # Has not been tested otherwise
-        raise ValueError('m must have ndim <= 2, not {}'.format(mat.ndim))
+        raise ValueError("m must have ndim <= 2, not {}".format(mat.ndim))
     res = kron_vector_op(m)
     res_shape = res.shape
     return tt.reshape(res, (res_shape[1], res_shape[0])).T
@@ -81,6 +120,7 @@ def kron_vector_op(v):
 kron_solve_lower = partial(kron_matrix_op, op=tt.slinalg.solve_lower_triangular)
 kron_solve_upper = partial(kron_matrix_op, op=tt.slinalg.solve_upper_triangular)
 
+
 def flat_outer(a, b):
     return tt.outer(a, b).ravel()
 
@@ -101,7 +141,7 @@ def tround(*args, **kwargs):
     Temporary function to silence round warning in Theano. Please remove
     when the warning disappears.
     """
-    kwargs['mode'] = 'half_to_even'
+    kwargs["mode"] = "half_to_even"
     return tt.round(*args, **kwargs)
 
 
@@ -113,9 +153,7 @@ def logsumexp(x, axis=None):
 
 def logaddexp(a, b):
     diff = b - a
-    return tt.switch(diff > 0,
-                     b + tt.log1p(tt.exp(-diff)),
-                     a + tt.log1p(tt.exp(diff)))
+    return tt.switch(diff > 0, b + tt.log1p(tt.exp(-diff)), a + tt.log1p(tt.exp(diff)))
 
 
 def logdiffexp(a, b):
@@ -125,7 +163,7 @@ def logdiffexp(a, b):
 
 def invlogit(x, eps=sys.float_info.epsilon):
     """The inverse of the logit function, 1 / (1 + exp(-x))."""
-    return (1. - 2. * eps) / (1. + tt.exp(-x)) + eps
+    return (1.0 - 2.0 * eps) / (1.0 + tt.exp(-x)) + eps
 
 
 def logit(p):
@@ -148,10 +186,7 @@ def log1mexp(x):
     For details, see
     https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
     """
-    return tt.switch(
-        tt.lt(x, 0.683),
-        tt.log(-tt.expm1(-x)),
-        tt.log1p(-tt.exp(-x)))
+    return tt.switch(tt.lt(x, 0.683), tt.log(-tt.expm1(-x)), tt.log1p(-tt.exp(-x)))
 
 
 def flatten_list(tensors):
@@ -168,6 +203,7 @@ class LogDet(Op):
     Once PR #3959 (https://github.com/Theano/Theano/pull/3959/) by harpone is merged,
     this must be removed.
     """
+
     def make_node(self, x):
         x = theano.tensor.as_tensor_variable(x)
         o = theano.tensor.scalar(dtype=x.dtype)
@@ -181,7 +217,7 @@ def perform(self, node, inputs, outputs, params=None):
             log_det = np.sum(np.log(np.abs(s)))
             z[0] = np.asarray(log_det, dtype=x.dtype)
         except Exception:
-            print('Failed to compute logdet of {}.'.format(x))
+            print("Failed to compute logdet of {}.".format(x))
             raise
 
     def grad(self, inputs, g_outputs):
@@ -192,19 +228,20 @@ def grad(self, inputs, g_outputs):
     def __str__(self):
         return "LogDet"
 
+
 logdet = LogDet()
 
 
 def probit(p):
-    return -sqrt(2.) * erfcinv(2. * p)
+    return -sqrt(2.0) * erfcinv(2.0 * p)
 
 
 def invprobit(x):
-    return .5 * erfc(-x / sqrt(2.))
+    return 0.5 * erfc(-x / sqrt(2.0))
 
 
 def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
-    R"""Convert a packed triangular matrix into a two dimensional array.
+    r"""Convert a packed triangular matrix into a two dimensional array.
 
     Triangular matrices can be stored with better space efficiancy by
     storing the non-zero values in a one-dimensional array. We number
@@ -227,9 +264,9 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
         If true, return only the diagonal of the matrix.
     """
     if packed.ndim != 1:
-        raise ValueError('Packed triagular is not one dimensional.')
+        raise ValueError("Packed triagular is not one dimensional.")
     if not isinstance(n, int):
-        raise TypeError('n must be an integer')
+        raise TypeError("n must be an integer")
 
     if diagonal_only and lower:
         diag_idxs = np.arange(1, n + 1).cumsum() - 1
@@ -251,12 +288,13 @@ class BatchedDiag(tt.Op):
     """
     Fast BatchedDiag allocation
     """
+
     __props__ = ()
 
     def make_node(self, diag):
         diag = tt.as_tensor_variable(diag)
         if diag.type.ndim != 2:
-            raise TypeError('data argument must be a matrix', diag.type)
+            raise TypeError("data argument must be a matrix", diag.type)
 
         return tt.Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)])
 
@@ -278,7 +316,7 @@ def grad(self, inputs, gout):
         return [gz[..., idx, idx]]
 
     def infer_shape(self, nodes, shapes):
-        return [(shapes[0][0], ) + (shapes[0][1],) * 2]
+        return [(shapes[0][0],) + (shapes[0][1],) * 2]
 
 
 def batched_diag(C):
@@ -292,26 +330,30 @@ def batched_diag(C):
         idx = tt.arange(dim)
         return C[..., idx, idx]
     else:
-        raise ValueError('Input should be 2 or 3 dimensional')
+        raise ValueError("Input should be 2 or 3 dimensional")
 
 
 class BlockDiagonalMatrix(Op):
-    __props__ = ('sparse', 'format')
+    __props__ = ("sparse", "format")
 
-    def __init__(self, sparse=False, format='csr'):
-        if format not in ('csr', 'csc'):
-            raise ValueError("format must be one of: 'csr', 'csc', got {}".format(format))
+    def __init__(self, sparse=False, format="csr"):
+        if format not in ("csr", "csc"):
+            raise ValueError(
+                "format must be one of: 'csr', 'csc', got {}".format(format)
+            )
         self.sparse = sparse
         self.format = format
 
     def make_node(self, *matrices):
         if not matrices:
-            raise ValueError('no matrices to allocate')
+            raise ValueError("no matrices to allocate")
         matrices = list(map(tt.as_tensor, matrices))
         if any(mat.type.ndim != 2 for mat in matrices):
-            raise TypeError('all data arguments must be matrices')
+            raise TypeError("all data arguments must be matrices")
         if self.sparse:
-            out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
+            out_type = theano.sparse.matrix(
+                self.format, dtype=largest_common_dtype(matrices)
+            )
         else:
             out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices))
         return tt.Apply(self, matrices, [out_type])
@@ -319,9 +361,7 @@ def make_node(self, *matrices):
     def perform(self, node, inputs, output_storage, params=None):
         dtype = largest_common_dtype(inputs)
         if self.sparse:
-            output_storage[0][0] = sp.sparse.block_diag(
-                inputs, self.format, dtype
-            )
+            output_storage[0][0] = sp.sparse.block_diag(inputs, self.format, dtype)
         else:
             output_storage[0][0] = scipy_block_diag(*inputs).astype(dtype)
 
@@ -329,9 +369,13 @@ def grad(self, inputs, gout):
         shapes = tt.stack([i.shape for i in inputs])
         index_end = shapes.cumsum(0)
         index_begin = index_end - shapes
-        slices = [ix_(tt.arange(index_begin[i, 0], index_end[i, 0]),
-                      tt.arange(index_begin[i, 1], index_end[i, 1])
-                      ) for i in range(len(inputs))]
+        slices = [
+            ix_(
+                tt.arange(index_begin[i, 0], index_end[i, 0]),
+                tt.arange(index_begin[i, 1], index_end[i, 1]),
+            )
+            for i in range(len(inputs))
+        ]
         return [gout[0][slc] for slc in slices]
 
     def infer_shape(self, nodes, shapes):
@@ -339,7 +383,7 @@ def infer_shape(self, nodes, shapes):
         return [(tt.add(*first), tt.add(*second))]
 
 
-def block_diagonal(matrices, sparse=False, format='csr'):
+def block_diagonal(matrices, sparse=False, format="csr"):
     r"""See scipy.sparse.block_diag or
     scipy.linalg.block_diag for reference
 
diff --git a/pymc3/memoize.py b/pymc3/memoize.py
index 48cf73be4f..c5541e4cec 100644
--- a/pymc3/memoize.py
+++ b/pymc3/memoize.py
@@ -2,6 +2,7 @@
 import pickle
 import collections
 from .util import biwrap
+
 CACHE_REGISTRY = []
 
 
@@ -23,14 +24,15 @@ def memoizer(*args, **kwargs):
         else:
             # bound methods have self as first argument, remove it to compute key
             key = (hashable(args[1:]), hashable(kwargs))
-            if not hasattr(args[0], '_cache'):
-                setattr(args[0], '_cache', collections.defaultdict(dict))
+            if not hasattr(args[0], "_cache"):
+                setattr(args[0], "_cache", collections.defaultdict(dict))
                 # do not add to cache regestry
-            cache = getattr(args[0], '_cache')[obj.__name__]
+            cache = getattr(args[0], "_cache")[obj.__name__]
         if key not in cache:
             cache[key] = obj(*args, **kwargs)
 
         return cache[key]
+
     return memoizer
 
 
@@ -40,7 +42,7 @@ def clear_cache(obj=None):
             c.clear()
     else:
         if isinstance(obj, WithMemoization):
-            for v in getattr(obj, '_cache', {}).values():
+            for v in getattr(obj, "_cache", {}).values():
                 v.clear()
         else:
             obj.cache.clear()
@@ -52,7 +54,7 @@ def __hash__(self):
 
     def __getstate__(self):
         state = self.__dict__.copy()
-        state.pop('_cache', None)
+        state.pop("_cache", None)
         return state
 
     def __setstate__(self, state):
@@ -73,7 +75,7 @@ def hashable(a):
     try:
         return hash(pickle.dumps(a))
     except Exception:
-        if hasattr(a, '__dict__'):
+        if hasattr(a, "__dict__"):
             return hashable(a.__dict__)
         else:
             return id(a)
diff --git a/pymc3/model.py b/pymc3/model.py
index 06284e8ee8..f7fdd9be13 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -21,11 +21,18 @@
 from .util import get_transformed_name
 
 __all__ = [
-    'Model', 'Factor', 'compilef', 'fn', 'fastfn', 'modelcontext',
-    'Point', 'Deterministic', 'Potential'
+    "Model",
+    "Factor",
+    "compilef",
+    "fn",
+    "fastfn",
+    "modelcontext",
+    "Point",
+    "Deterministic",
+    "Potential",
 ]
 
-FlatView = collections.namedtuple('FlatView', 'input, replacements, view')
+FlatView = collections.namedtuple("FlatView", "input, replacements, view")
 
 
 class InstanceMethod(object):
@@ -42,8 +49,9 @@ def __call__(self, *args, **kwargs):
         return getattr(self.obj, self.method_name)(*args, **kwargs)
 
 
-def incorporate_methods(source, destination, methods, default=None,
-                        wrapper=None, override=False):
+def incorporate_methods(
+    source, destination, methods, default=None, wrapper=None, override=False
+):
     """
     Add attributes to a destination object which points to
     methods from from a source object.
@@ -68,9 +76,11 @@ def incorporate_methods(source, destination, methods, default=None,
     """
     for method in methods:
         if hasattr(destination, method) and not override:
-            raise AttributeError("Cannot add method {!r}".format(method) +
-                                 "to destination object as it already exists. "
-                                 "To prevent this error set 'override=True'.")
+            raise AttributeError(
+                "Cannot add method {!r}".format(method)
+                + "to destination object as it already exists. "
+                "To prevent this error set 'override=True'."
+            )
         if hasattr(source, method):
             if wrapper is None:
                 setattr(destination, method, getattr(source, method))
@@ -79,6 +89,7 @@ def incorporate_methods(source, destination, methods, default=None,
         else:
             setattr(destination, method, None)
 
+
 def get_named_nodes_and_relations(graph):
     """Get the named nodes in a theano graph (i.e., nodes whose name
     attribute is not None) along with their relationships (i.e., the
@@ -110,9 +121,11 @@ def get_named_nodes_and_relations(graph):
         node_children = {}
     return _get_named_nodes_and_relations(graph, None, {}, node_parents, node_children)
 
-def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
-                                        node_parents, node_children):
-    if getattr(graph, 'owner', None) is None:  # Leaf node
+
+def _get_named_nodes_and_relations(
+    graph, parent, leaf_nodes, node_parents, node_children
+):
+    if getattr(graph, "owner", None) is None:  # Leaf node
         if graph.name is not None:  # Named leaf node
             leaf_nodes.update({graph.name: graph})
             if parent is not None:  # Is None for the root node
@@ -137,9 +150,9 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
             # Init the nodes children to an empty set
             node_children[graph] = set()
         for i in graph.owner.inputs:
-            temp_nodes, temp_inter, temp_tree = \
-                _get_named_nodes_and_relations(i, parent, leaf_nodes,
-                                               node_parents, node_children)
+            temp_nodes, temp_inter, temp_tree = _get_named_nodes_and_relations(
+                i, parent, leaf_nodes, node_parents, node_children
+            )
             leaf_nodes.update(temp_nodes)
             node_parents.update(temp_inter)
             node_children.update(temp_tree)
@@ -150,26 +163,27 @@ class Context(object):
     """Functionality for objects that put themselves in a context using
     the `with` statement.
     """
+
     contexts = threading.local()
 
     def __enter__(self):
         type(self).get_contexts().append(self)
         # self._theano_config is set in Model.__new__
-        if hasattr(self, '_theano_config'):
+        if hasattr(self, "_theano_config"):
             self._old_theano_config = set_theano_conf(self._theano_config)
         return self
 
     def __exit__(self, typ, value, traceback):
         type(self).get_contexts().pop()
         # self._theano_config is set in Model.__new__
-        if hasattr(self, '_old_theano_config'):
+        if hasattr(self, "_old_theano_config"):
             set_theano_conf(self._old_theano_config)
 
     @classmethod
     def get_contexts(cls):
         # no race-condition here, cls.contexts is a thread-local object
         # be sure not to override contexts in a subclass however!
-        if not hasattr(cls.contexts, 'stack'):
+        if not hasattr(cls.contexts, "stack"):
             cls.contexts.stack = []
         return cls.contexts.stack
 
@@ -195,6 +209,7 @@ class Factor(object):
     """Common functionality for objects with a log probability density
     associated with them.
     """
+
     def __init__(self, *args, **kwargs):
         super(Factor, self).__init__(*args, **kwargs)
 
@@ -255,28 +270,29 @@ def fastd2logp_nojac(self, vars=None):
     @property
     def logpt(self):
         """Theano scalar of log-probability of the model"""
-        if getattr(self, 'total_size', None) is not None:
+        if getattr(self, "total_size", None) is not None:
             logp = self.logp_sum_unscaledt * self.scaling
         else:
             logp = self.logp_sum_unscaledt
         if self.name is not None:
-            logp.name = '__logp_%s' % self.name
+            logp.name = "__logp_%s" % self.name
         return logp
 
     @property
     def logp_nojact(self):
         """Theano scalar of log-probability, excluding jacobian terms."""
-        if getattr(self, 'total_size', None) is not None:
+        if getattr(self, "total_size", None) is not None:
             logp = tt.sum(self.logp_nojac_unscaledt) * self.scaling
         else:
             logp = tt.sum(self.logp_nojac_unscaledt)
         if self.name is not None:
-            logp.name = '__logp_%s' % self.name
+            logp.name = "__logp_%s" % self.name
         return logp
 
 
 class InitContextMeta(type):
     """Metaclass that executes `__init__` of instance in it's context"""
+
     def __call__(cls, *args, **kwargs):
         instance = cls.__new__(cls, *args, **kwargs)
         with instance:  # appends context
@@ -286,11 +302,13 @@ def __call__(cls, *args, **kwargs):
 
 def withparent(meth):
     """Helper wrapper that passes calls to parent's instance"""
+
     def wrapped(self, *args, **kwargs):
         res = meth(self, *args, **kwargs)
-        if getattr(self, 'parent', None) is not None:
+        if getattr(self, "parent", None) is not None:
             getattr(self.parent, meth.__name__)(*args, **kwargs)
         return res
+
     # Unfortunately functools wrapper fails
     # when decorating built-in methods so we
     # need to fix that improper behaviour
@@ -303,12 +321,14 @@ class treelist(list):
     to parent list instance.
     Extending treelist you will also extend its parent
     """
+
     def __init__(self, iterable=(), parent=None):
         super(treelist, self).__init__(iterable)
         assert isinstance(parent, list) or parent is None
         self.parent = parent
         if self.parent is not None:
             self.parent.extend(self)
+
     # typechecking here works bad
     append = withparent(list.append)
     __iadd__ = withparent(list.__iadd__)
@@ -316,18 +336,18 @@ def __init__(self, iterable=(), parent=None):
 
     def tree_contains(self, item):
         if isinstance(self.parent, treedict):
-            return (list.__contains__(self, item) or
-                    self.parent.tree_contains(item))
+            return list.__contains__(self, item) or self.parent.tree_contains(item)
         elif isinstance(self.parent, list):
-            return (list.__contains__(self, item) or
-                    self.parent.__contains__(item))
+            return list.__contains__(self, item) or self.parent.__contains__(item)
         else:
             return list.__contains__(self, item)
 
     def __setitem__(self, key, value):
-        raise NotImplementedError('Method is removed as we are not'
-                                  ' able to determine '
-                                  'appropriate logic for it')
+        raise NotImplementedError(
+            "Method is removed as we are not"
+            " able to determine "
+            "appropriate logic for it"
+        )
 
     def __imul__(self, other):
         t0 = len(self)
@@ -341,12 +361,14 @@ class treedict(dict):
     to parent dict instance.
     Extending treedict you will also extend its parent
     """
+
     def __init__(self, iterable=(), parent=None, **kwargs):
         super(treedict, self).__init__(iterable, **kwargs)
         assert isinstance(parent, dict) or parent is None
         self.parent = parent
         if self.parent is not None:
             self.parent.update(self)
+
     # typechecking here works bad
     __setitem__ = withparent(dict.__setitem__)
     update = withparent(dict.update)
@@ -354,11 +376,9 @@ def __init__(self, iterable=(), parent=None, **kwargs):
     def tree_contains(self, item):
         # needed for `add_random_variable` method
         if isinstance(self.parent, treedict):
-            return (dict.__contains__(self, item) or
-                    self.parent.tree_contains(item))
+            return dict.__contains__(self, item) or self.parent.tree_contains(item)
         elif isinstance(self.parent, dict):
-            return (dict.__contains__(self, item) or
-                    self.parent.__contains__(item))
+            return dict.__contains__(self, item) or self.parent.__contains__(item)
         else:
             return dict.__contains__(self, item)
 
@@ -395,19 +415,21 @@ class ValueGradFunction(object):
         gradient. This is None unless `profile=True` was set in the
         kwargs.
     """
-    def __init__(self, cost, grad_vars, extra_vars=None, dtype=None,
-                 casting='no', **kwargs):
+
+    def __init__(
+        self, cost, grad_vars, extra_vars=None, dtype=None, casting="no", **kwargs
+    ):
         if extra_vars is None:
             extra_vars = []
 
         names = [arg.name for arg in grad_vars + extra_vars]
         if any(name is None for name in names):
-            raise ValueError('Arguments must be named.')
+            raise ValueError("Arguments must be named.")
         if len(set(names)) != len(names):
-            raise ValueError('Names of the arguments are not unique.')
+            raise ValueError("Names of the arguments are not unique.")
 
         if cost.ndim > 0:
-            raise ValueError('Cost must be a scalar.')
+            raise ValueError("Cost must be a scalar.")
 
         self._grad_vars = grad_vars
         self._extra_vars = extra_vars
@@ -421,31 +443,35 @@ def __init__(self, cost, grad_vars, extra_vars=None, dtype=None,
         self.dtype = dtype
         for var in self._grad_vars:
             if not np.can_cast(var.dtype, self.dtype, casting):
-                raise TypeError('Invalid dtype for variable %s. Can not '
-                                'cast to %s with casting rule %s.'
-                                % (var.name, self.dtype, casting))
+                raise TypeError(
+                    "Invalid dtype for variable %s. Can not "
+                    "cast to %s with casting rule %s." % (var.name, self.dtype, casting)
+                )
             if not np.issubdtype(var.dtype, np.floating):
-                raise TypeError('Invalid dtype for variable %s. Must be '
-                                'floating point but is %s.'
-                                % (var.name, var.dtype))
+                raise TypeError(
+                    "Invalid dtype for variable %s. Must be "
+                    "floating point but is %s." % (var.name, var.dtype)
+                )
 
         givens = []
         self._extra_vars_shared = {}
         for var in extra_vars:
-            shared = theano.shared(var.tag.test_value, var.name + '_shared__')
+            shared = theano.shared(var.tag.test_value, var.name + "_shared__")
             self._extra_vars_shared[var.name] = shared
             givens.append((var, shared))
 
         self._vars_joined, self._cost_joined = self._build_joined(
-            self._cost, grad_vars, self._ordering.vmap)
+            self._cost, grad_vars, self._ordering.vmap
+        )
 
         grad = tt.grad(self._cost_joined, self._vars_joined)
-        grad.name = '__grad'
+        grad.name = "__grad"
 
         inputs = [self._vars_joined]
 
         self._theano_function = theano.function(
-            inputs, [self._cost_joined, grad], givens=givens, **kwargs)
+            inputs, [self._cost_joined, grad], givens=givens, **kwargs
+        )
 
     def set_extra_values(self, extra_vars):
         self._extra_are_set = True
@@ -454,21 +480,25 @@ def set_extra_values(self, extra_vars):
 
     def get_extra_values(self):
         if not self._extra_are_set:
-            raise ValueError('Extra values are not set.')
+            raise ValueError("Extra values are not set.")
 
-        return {var.name: self._extra_vars_shared[var.name].get_value()
-                for var in self._extra_vars}
+        return {
+            var.name: self._extra_vars_shared[var.name].get_value()
+            for var in self._extra_vars
+        }
 
     def __call__(self, array, grad_out=None, extra_vars=None):
         if extra_vars is not None:
             self.set_extra_values(extra_vars)
 
         if not self._extra_are_set:
-            raise ValueError('Extra values are not set.')
+            raise ValueError("Extra values are not set.")
 
         if array.shape != (self.size,):
-            raise ValueError('Invalid shape for array. Must be %s but is %s.'
-                             % ((self.size,), array.shape))
+            raise ValueError(
+                "Invalid shape for array. Must be %s but is %s."
+                % ((self.size,), array.shape)
+            )
 
         if grad_out is None:
             out = np.empty_like(array)
@@ -497,11 +527,14 @@ def dict_to_array(self, point):
     def array_to_dict(self, array):
         """Convert an array to a dictionary containing the grad_vars."""
         if array.shape != (self.size,):
-            raise ValueError('Array should have shape (%s,) but has %s'
-                             % (self.size, array.shape))
+            raise ValueError(
+                "Array should have shape (%s,) but has %s" % (self.size, array.shape)
+            )
         if array.dtype != self.dtype:
-            raise ValueError('Array has invalid dtype. Should be %s but is %s'
-                             % (self._dtype, self.dtype))
+            raise ValueError(
+                "Array has invalid dtype. Should be %s but is %s"
+                % (self._dtype, self.dtype)
+            )
         point = {}
         for varmap in self._ordering.vmap:
             data = array[varmap.slc].reshape(varmap.shp)
@@ -517,7 +550,7 @@ def array_to_full_dict(self, array):
         return point
 
     def _build_joined(self, cost, args, vmap):
-        args_joined = tt.vector('__args_joined')
+        args_joined = tt.vector("__args_joined")
         args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype)
 
         joined_slices = {}
@@ -618,22 +651,23 @@ def __init__(self, mean=0, sd=1, name='', model=None):
             CustomModel(mean=1, name='first')
             CustomModel(mean=2, name='second')
     """
+
     def __new__(cls, *args, **kwargs):
         # resolves the parent instance
         instance = super(Model, cls).__new__(cls)
-        if kwargs.get('model') is not None:
-            instance._parent = kwargs.get('model')
+        if kwargs.get("model") is not None:
+            instance._parent = kwargs.get("model")
         elif cls.get_contexts():
             instance._parent = cls.get_contexts()[-1]
         else:
             instance._parent = None
-        theano_config = kwargs.get('theano_config', None)
-        if theano_config is None or 'compute_test_value' not in theano_config:
-            theano_config = {'compute_test_value': 'raise'}
+        theano_config = kwargs.get("theano_config", None)
+        if theano_config is None or "compute_test_value" not in theano_config:
+            theano_config = {"compute_test_value": "raise"}
         instance._theano_config = theano_config
         return instance
 
-    def __init__(self, name='', model=None, theano_config=None):
+    def __init__(self, name="", model=None, theano_config=None):
         self.name = name
         if self.parent is not None:
             self.named_vars = treedict(parent=self.parent.named_vars)
@@ -674,8 +708,7 @@ def isroot(self):
     def bijection(self):
         vars = inputvars(self.cont_vars)
 
-        bij = DictToArrayBijection(ArrayOrdering(vars),
-                                   self.test_point)
+        bij = DictToArrayBijection(ArrayOrdering(vars), self.test_point)
 
         return bij
 
@@ -702,8 +735,9 @@ def logp_dlogp_function(self, grad_vars=None, **kwargs):
         else:
             for var in grad_vars:
                 if var.dtype not in continuous_types:
-                    raise ValueError("Can only compute the gradient of "
-                                     "continuous types: %s" % var)
+                    raise ValueError(
+                        "Can only compute the gradient of " "continuous types: %s" % var
+                    )
         varnames = [var.name for var in grad_vars]
         extra_vars = [var for var in self.free_RVs if var.name not in varnames]
         return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
@@ -715,9 +749,9 @@ def logpt(self):
             factors = [var.logpt for var in self.basic_RVs] + self.potentials
             logp = tt.sum([tt.sum(factor) for factor in factors])
             if self.name:
-                logp.name = '__logp_%s' % self.name
+                logp.name = "__logp_%s" % self.name
             else:
-                logp.name = '__logp'
+                logp.name = "__logp"
             return logp
 
     @property
@@ -727,9 +761,9 @@ def logp_nojact(self):
             factors = [var.logp_nojact for var in self.basic_RVs] + self.potentials
             logp = tt.sum([tt.sum(factor) for factor in factors])
             if self.name:
-                logp.name = '__logp_nojac_%s' % self.name
+                logp.name = "__logp_nojac_%s" % self.name
             else:
-                logp.name = '__logp_nojac'
+                logp.name = "__logp_nojac"
             return logp
 
     @property
@@ -769,8 +803,7 @@ def unobserved_RVs(self):
     @property
     def test_point(self):
         """Test point used to check that the model doesn't generate errors"""
-        return Point(((var, var.tag.test_value) for var in self.vars),
-                     model=self)
+        return Point(((var, var.tag.test_value) for var in self.vars), model=self)
 
     @property
     def disc_vars(self):
@@ -804,27 +837,39 @@ def Var(self, name, dist, data=None, total_size=None):
         if data is None:
             if getattr(dist, "transform", None) is None:
                 with self:
-                    var = FreeRV(name=name, distribution=dist,
-                                 total_size=total_size, model=self)
+                    var = FreeRV(
+                        name=name, distribution=dist, total_size=total_size, model=self
+                    )
                 self.free_RVs.append(var)
             else:
                 with self:
-                    var = TransformedRV(name=name, distribution=dist,
-                                        transform=dist.transform,
-                                        total_size=total_size,
-                                        model=self)
-                pm._log.debug('Applied {transform}-transform to {name}'
-                              ' and added transformed {orig_name} to model.'.format(
-                                transform=dist.transform.name,
-                                name=name,
-                                orig_name=get_transformed_name(name, dist.transform)))
+                    var = TransformedRV(
+                        name=name,
+                        distribution=dist,
+                        transform=dist.transform,
+                        total_size=total_size,
+                        model=self,
+                    )
+                pm._log.debug(
+                    "Applied {transform}-transform to {name}"
+                    " and added transformed {orig_name} to model.".format(
+                        transform=dist.transform.name,
+                        name=name,
+                        orig_name=get_transformed_name(name, dist.transform),
+                    )
+                )
                 self.deterministics.append(var)
                 self.add_random_variable(var)
                 return var
         elif isinstance(data, dict):
             with self:
-                var = MultiObservedRV(name=name, data=data, distribution=dist,
-                                      total_size=total_size, model=self)
+                var = MultiObservedRV(
+                    name=name,
+                    data=data,
+                    distribution=dist,
+                    total_size=total_size,
+                    model=self,
+                )
             self.observed_RVs.append(var)
             if var.missing_values:
                 self.free_RVs += var.missing_values
@@ -833,9 +878,13 @@ def Var(self, name, dist, data=None, total_size=None):
                     self.named_vars[v.name] = v
         else:
             with self:
-                var = ObservedRV(name=name, data=data,
-                                 distribution=dist,
-                                 total_size=total_size, model=self)
+                var = ObservedRV(
+                    name=name,
+                    data=data,
+                    distribution=dist,
+                    total_size=total_size,
+                    model=self,
+                )
             self.observed_RVs.append(var)
             if var.missing_values:
                 self.free_RVs.append(var.missing_values)
@@ -848,22 +897,21 @@ def Var(self, name, dist, data=None, total_size=None):
     def add_random_variable(self, var):
         """Add a random variable to the named variables of the model."""
         if self.named_vars.tree_contains(var.name):
-            raise ValueError(
-                "Variable name {} already exists.".format(var.name))
+            raise ValueError("Variable name {} already exists.".format(var.name))
         self.named_vars[var.name] = var
         if not hasattr(self, self.name_of(var.name)):
             setattr(self, self.name_of(var.name), var)
 
     @property
     def prefix(self):
-        return '%s_' % self.name if self.name else ''
+        return "%s_" % self.name if self.name else ""
 
     def name_for(self, name):
         """Checks if name has prefix and adds if needed
         """
         if self.prefix:
             if not name.startswith(self.prefix):
-                return '{}{}'.format(self.prefix, name)
+                return "{}{}".format(self.prefix, name)
             else:
                 return name
         else:
@@ -875,7 +923,7 @@ def name_of(self, name):
         if not self.prefix or not name:
             return name
         elif name.startswith(self.prefix):
-            return name[len(self.prefix):]
+            return name[len(self.prefix) :]
         else:
             return name
 
@@ -902,11 +950,16 @@ def makefn(self, outs, mode=None, *args, **kwargs):
         Compiled Theano function
         """
         with self:
-            return theano.function(self.vars, outs,
-                                   allow_input_downcast=True,
-                                   on_unused_input='ignore',
-                                   accept_inplace=True,
-                                   mode=mode, *args, **kwargs)
+            return theano.function(
+                self.vars,
+                outs,
+                allow_input_downcast=True,
+                on_unused_input="ignore",
+                accept_inplace=True,
+                mode=mode,
+                *args,
+                **kwargs
+            )
 
     def fn(self, outs, mode=None, *args, **kwargs):
         """Compiles a Theano function which returns the values of ``outs``
@@ -994,14 +1047,16 @@ def flatten(self, vars=None, order=None, inputvar=None):
         if order is None:
             order = ArrayOrdering(vars)
         if inputvar is None:
-            inputvar = tt.vector('flat_view', dtype=theano.config.floatX)
-            if theano.config.compute_test_value != 'off':
+            inputvar = tt.vector("flat_view", dtype=theano.config.floatX)
+            if theano.config.compute_test_value != "off":
                 if vars:
                     inputvar.tag.test_value = flatten_list(vars).tag.test_value
                 else:
                     inputvar.tag.test_value = np.asarray([], inputvar.dtype)
-        replacements = {self.named_vars[name]: inputvar[slc].reshape(shape).astype(dtype)
-                        for name, slc, shape, dtype in order.vmap}
+        replacements = {
+            self.named_vars[name]: inputvar[slc].reshape(shape).astype(dtype)
+            for name, slc, shape, dtype in order.vmap
+        }
         view = {vm.var: vm for vm in order.vmap}
         flat_view = FlatView(inputvar, replacements, view)
         return flat_view
@@ -1024,21 +1079,28 @@ def check_test_point(self, test_point=None, round_vals=2):
         if test_point is None:
             test_point = self.test_point
 
-        return Series({RV.name:np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs},
-            name='Log-probability of test_point')
+        return Series(
+            {
+                RV.name: np.round(RV.logp(self.test_point), round_vals)
+                for RV in self.basic_RVs
+            },
+            name="Log-probability of test_point",
+        )
 
     def _repr_latex_(self, name=None, dist=None):
         tex_vars = []
         for rv in itertools.chain(self.unobserved_RVs, self.observed_RVs):
             rv_tex = rv.__latex__()
             if rv_tex is not None:
-                array_rv = rv_tex.replace(r'\sim', r'&\sim &').strip('$')
+                array_rv = rv_tex.replace(r"\sim", r"&\sim &").strip("$")
                 tex_vars.append(array_rv)
-        return r'''$$
+        return r"""$$
             \begin{{array}}{{rcl}}
             {}
             \end{{array}}
-            $$'''.format('\\\\'.join(tex_vars))
+            $$""".format(
+            "\\\\".join(tex_vars)
+        )
 
     __latex__ = _repr_latex_
 
@@ -1086,15 +1148,15 @@ def Point(*args, **kwargs):
     args, kwargs
         arguments to build a dict
     """
-    model = modelcontext(kwargs.pop('model', None))
+    model = modelcontext(kwargs.pop("model", None))
     args = list(args)
     try:
         d = dict(*args, **kwargs)
     except Exception as e:
-        raise TypeError(
-            "can't turn {} and {} into a dict. {}".format(args, kwargs, e))
-    return dict((str(k), np.array(v)) for k, v in d.items()
-                if str(k) in map(str, model.vars))
+        raise TypeError("can't turn {} and {} into a dict. {}".format(args, kwargs, e))
+    return dict(
+        (str(k), np.array(v)) for k, v in d.items() if str(k) in map(str, model.vars)
+    )
 
 
 class FastPointFunc(object):
@@ -1119,6 +1181,7 @@ def __call__(self, *args, **kwargs):
         point = Point(model=self.model, *args, **kwargs)
         return self.f(**point)
 
+
 compilef = fastfn
 
 
@@ -1147,43 +1210,65 @@ def _get_scaling(total_size, shape, ndim):
             denom = 1
         coef = floatX(total_size) / floatX(denom)
     elif isinstance(total_size, (list, tuple)):
-        if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)):
-            raise TypeError('Unrecognized `total_size` type, expected '
-                            'int or list of ints, got %r' % total_size)
+        if not all(
+            isinstance(i, int)
+            for i in total_size
+            if (i is not Ellipsis and i is not None)
+        ):
+            raise TypeError(
+                "Unrecognized `total_size` type, expected "
+                "int or list of ints, got %r" % total_size
+            )
         if Ellipsis in total_size:
             sep = total_size.index(Ellipsis)
             begin = total_size[:sep]
-            end = total_size[sep+1:]
+            end = total_size[sep + 1 :]
             if Ellipsis in end:
-                raise ValueError('Double Ellipsis in `total_size` is restricted, got %r' % total_size)
+                raise ValueError(
+                    "Double Ellipsis in `total_size` is restricted, got %r" % total_size
+                )
         else:
             begin = total_size
             end = []
         if (len(begin) + len(end)) > ndim:
-            raise ValueError('Length of `total_size` is too big, '
-                             'number of scalings is bigger that ndim, got %r' % total_size)
+            raise ValueError(
+                "Length of `total_size` is too big, "
+                "number of scalings is bigger that ndim, got %r" % total_size
+            )
         elif (len(begin) + len(end)) == 0:
             return floatX(1)
         if len(end) > 0:
-            shp_end = shape[-len(end):]
+            shp_end = shape[-len(end) :]
         else:
             shp_end = np.asarray([])
-        shp_begin = shape[:len(begin)]
-        begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None]
+        shp_begin = shape[: len(begin)]
+        begin_coef = [
+            floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None
+        ]
         end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None]
         coefs = begin_coef + end_coef
         coef = tt.prod(coefs)
     else:
-        raise TypeError('Unrecognized `total_size` type, expected '
-                        'int or list of ints, got %r' % total_size)
+        raise TypeError(
+            "Unrecognized `total_size` type, expected "
+            "int or list of ints, got %r" % total_size
+        )
     return tt.as_tensor(floatX(coef))
 
 
 class FreeRV(Factor, TensorVariable):
     """Unobserved random variable that a model is specified in terms of."""
 
-    def __init__(self, type=None, owner=None, index=None, name=None,
-                 distribution=None, total_size=None, model=None):
+    def __init__(
+        self,
+        type=None,
+        owner=None,
+        index=None,
+        name=None,
+        distribution=None,
+        total_size=None,
+        model=None,
+    ):
         """
         Parameters
         ----------
@@ -1203,8 +1288,9 @@ def __init__(self, type=None, owner=None, index=None, name=None,
             self.dshape = tuple(distribution.shape)
             self.dsize = int(np.prod(distribution.shape))
             self.distribution = distribution
-            self.tag.test_value = np.ones(
-                distribution.shape, distribution.dtype) * distribution.default()
+            self.tag.test_value = (
+                np.ones(distribution.shape, distribution.dtype) * distribution.default()
+            )
             self.logp_elemwiset = distribution.logp(self)
             # The logp might need scaling in minibatches.
             # This is done in `Factor`.
@@ -1214,9 +1300,12 @@ def __init__(self, type=None, owner=None, index=None, name=None,
             self.model = model
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
 
-            incorporate_methods(source=distribution, destination=self,
-                                methods=['random'],
-                                wrapper=InstanceMethod)
+            incorporate_methods(
+                source=distribution,
+                destination=self,
+                methods=["random"],
+                wrapper=InstanceMethod,
+            )
 
     def _repr_latex_(self, name=None, dist=None):
         if self.distribution is None:
@@ -1236,12 +1325,12 @@ def init_value(self):
 
 
 def pandas_to_array(data):
-    if hasattr(data, 'values'):  # pandas
+    if hasattr(data, "values"):  # pandas
         if data.isnull().any().any():  # missing values
             ret = np.ma.MaskedArray(data.values, data.isnull().values)
         else:
             ret = data.values
-    elif hasattr(data, 'mask'):
+    elif hasattr(data, "mask"):
         ret = data
     elif isinstance(data, theano.gof.graph.Variable):
         ret = data
@@ -1258,17 +1347,22 @@ def as_tensor(data, name, model, distribution):
     dtype = distribution.dtype
     data = pandas_to_array(data).astype(dtype)
 
-    if hasattr(data, 'mask'):
+    if hasattr(data, "mask"):
         from .distributions import NoDistribution
+
         testval = np.broadcast_to(distribution.default(), data.shape)[data.mask]
-        fakedist = NoDistribution.dist(shape=data.mask.sum(), dtype=dtype,
-                                       testval=testval, parent_dist=distribution)
-        missing_values = FreeRV(name=name + '_missing', distribution=fakedist,
-                                model=model)
+        fakedist = NoDistribution.dist(
+            shape=data.mask.sum(),
+            dtype=dtype,
+            testval=testval,
+            parent_dist=distribution,
+        )
+        missing_values = FreeRV(
+            name=name + "_missing", distribution=fakedist, model=model
+        )
         constant = tt.as_tensor_variable(data.filled())
 
-        dataTensor = tt.set_subtensor(
-            constant[data.mask.nonzero()], missing_values)
+        dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], missing_values)
         dataTensor.missing_values = missing_values
         return dataTensor
     elif sps.issparse(data):
@@ -1286,8 +1380,17 @@ class ObservedRV(Factor, TensorVariable):
     Potentially partially observed.
     """
 
-    def __init__(self, type=None, owner=None, index=None, name=None, data=None,
-                 distribution=None, total_size=None, model=None):
+    def __init__(
+        self,
+        type=None,
+        owner=None,
+        index=None,
+        name=None,
+        data=None,
+        distribution=None,
+        total_size=None,
+        model=None,
+    ):
         """
         Parameters
         ----------
@@ -1301,7 +1404,7 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
         """
         from .distributions import TensorType
 
-        if hasattr(data, 'type') and isinstance(data.type, tt.TensorType):
+        if hasattr(data, "type") and isinstance(data.type, tt.TensorType):
             type = data.type
 
         if type is None:
@@ -1326,8 +1429,7 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
             self.distribution = distribution
 
             # make this RV a view on the combined missing/nonmissing array
-            theano.gof.Apply(theano.compile.view_op,
-                             inputs=[data], outputs=[self])
+            theano.gof.Apply(theano.compile.view_op, inputs=[data], outputs=[self])
             self.tag.test_value = theano.compile.view_op(data).tag.test_value
             self.scaling = _get_scaling(total_size, data.shape, data.ndim)
 
@@ -1366,11 +1468,16 @@ def __init__(self, name, data, distribution, total_size=None, model=None):
             needed for upscaling logp
         """
         self.name = name
-        self.data = {name: as_tensor(data, name, model, distribution)
-                     for name, data in data.items()}
-
-        self.missing_values = [datum.missing_values for datum in self.data.values()
-                               if datum.missing_values is not None]
+        self.data = {
+            name: as_tensor(data, name, model, distribution)
+            for name, data in data.items()
+        }
+
+        self.missing_values = [
+            datum.missing_values
+            for datum in self.data.values()
+            if datum.missing_values is not None
+        ]
         self.logp_elemwiset = distribution.logp(**self.data)
         # The logp might need scaling in minibatches.
         # This is done in `Factor`.
@@ -1379,7 +1486,9 @@ def __init__(self, name, data, distribution, total_size=None, model=None):
         self.total_size = total_size
         self.model = model
         self.distribution = distribution
-        self.scaling = _get_scaling(total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim)
+        self.scaling = _get_scaling(
+            total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim
+        )
 
 
 def _walk_up_rv(rv):
@@ -1391,15 +1500,18 @@ def _walk_up_rv(rv):
             all_rvs.extend(_walk_up_rv(parent))
     else:
         if rv.name:
-            all_rvs.append(r'\text{%s}' % rv.name)
+            all_rvs.append(r"\text{%s}" % rv.name)
         else:
-            all_rvs.append(r'\text{Constant}')
+            all_rvs.append(r"\text{Constant}")
     return all_rvs
 
 
 def _latex_repr_rv(rv):
     """Make latex string for a Deterministic variable"""
-    return r'$\text{%s} \sim \text{Deterministic}(%s)$' % (rv.name, r',~'.join(_walk_up_rv(rv)))
+    return r"$\text{%s} \sim \text{Deterministic}(%s)$" % (
+        rv.name,
+        r",~".join(_walk_up_rv(rv)),
+    )
 
 
 def Deterministic(name, var, model=None):
@@ -1456,9 +1568,17 @@ class TransformedRV(TensorVariable):
         needed for upscaling logp
     """
 
-    def __init__(self, type=None, owner=None, index=None, name=None,
-                 distribution=None, model=None, transform=None,
-                 total_size=None):
+    def __init__(
+        self,
+        type=None,
+        owner=None,
+        index=None,
+        name=None,
+        distribution=None,
+        model=None,
+        transform=None,
+        total_size=None,
+    ):
         if type is None:
             type = distribution.type
         super(TransformedRV, self).__init__(type, owner, index, name)
@@ -1474,17 +1594,20 @@ def __init__(self, type=None, owner=None, index=None, name=None,
             transformed_name = get_transformed_name(name, transform)
 
             self.transformed = model.Var(
-                transformed_name, transform.apply(distribution), total_size=total_size)
+                transformed_name, transform.apply(distribution), total_size=total_size
+            )
 
             normalRV = transform.backward(self.transformed)
 
-            theano.Apply(theano.compile.view_op, inputs=[
-                         normalRV], outputs=[self])
+            theano.Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
             self.tag.test_value = normalRV.tag.test_value
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
-            incorporate_methods(source=distribution, destination=self,
-                                methods=['random'],
-                                wrapper=InstanceMethod)
+            incorporate_methods(
+                source=distribution,
+                destination=self,
+                methods=["random"],
+                wrapper=InstanceMethod,
+            )
 
     def _repr_latex_(self, name=None, dist=None):
         if self.distribution is None:
diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py
index e68a28cb17..269702a54d 100644
--- a/pymc3/model_graph.py
+++ b/pymc3/model_graph.py
@@ -14,21 +14,27 @@ def powerset(iterable):
     powerset([1,2,3]) --> (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)
     """
     s = list(iterable)
-    return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(1, len(s)+1))
+    return itertools.chain.from_iterable(
+        itertools.combinations(s, r) for r in range(1, len(s) + 1)
+    )
 
 
 class ModelGraph(object):
     def __init__(self, model):
         self.model = model
-        self.var_names = get_default_varnames(self.model.named_vars, include_transformed=False)
+        self.var_names = get_default_varnames(
+            self.model.named_vars, include_transformed=False
+        )
         self.var_list = self.model.named_vars.values()
-        self.transform_map = {v.transformed: v.name for v in self.var_list if hasattr(v, 'transformed')}
+        self.transform_map = {
+            v.transformed: v.name for v in self.var_list if hasattr(v, "transformed")
+        }
         self._deterministics = None
 
     def get_deterministics(self, var):
         """Compute the deterministic nodes of the graph"""
         deterministics = []
-        attrs = ('transformed', 'logpt')
+        attrs = ("transformed", "logpt")
         for v in self.var_list:
             if v != var and all(not hasattr(v, attr) for attr in attrs):
                 deterministics.append(v)
@@ -36,7 +42,13 @@ def get_deterministics(self, var):
 
     def _ancestors(self, var, func, blockers=None):
         """Get ancestors of a function that are also named PyMC3 variables"""
-        return set([j for j in ancestors([func], blockers=blockers) if j in self.var_list and j != var])
+        return set(
+            [
+                j
+                for j in ancestors([func], blockers=blockers)
+                if j in self.var_list and j != var
+            ]
+        )
 
     def _get_ancestors(self, var, func):
         """Get all ancestors of a function, doing some accounting for deterministics
@@ -51,12 +63,14 @@ def _get_ancestors(self, var, func):
         # Usual case
         if upstream == self._ancestors(var, func, blockers=upstream):
             return upstream
-        else: # deterministic accounting
+        else:  # deterministic accounting
             for d in powerset(upstream):
                 blocked = self._ancestors(var, func, blockers=d)
                 if set(d) == blocked:
                     return d
-        raise RuntimeError('Could not traverse graph. Consider raising an issue with developers.')
+        raise RuntimeError(
+            "Could not traverse graph. Consider raising an issue with developers."
+        )
 
     def _filter_parents(self, var, parents):
         """Get direct parents of a var, as strings"""
@@ -70,14 +84,14 @@ def _filter_parents(self, var, parents):
                 if self.transform_map[p] != var.name:
                     keep.add(self.transform_map[p])
             else:
-                raise AssertionError('Do not know what to do with {}'.format(str(p)))
+                raise AssertionError("Do not know what to do with {}".format(str(p)))
         return keep
 
     def get_parents(self, var):
         """Get the named nodes that are direct inputs to the var"""
-        if hasattr(var, 'transformed'):
+        if hasattr(var, "transformed"):
             func = var.transformed.logpt
-        elif hasattr(var, 'logpt'):
+        elif hasattr(var, "logpt"):
             func = var.logpt
         else:
             func = var
@@ -99,18 +113,22 @@ def _make_node(self, var_name, graph):
         # styling for node
         attrs = {}
         if isinstance(v, pm.model.ObservedRV):
-            attrs['style'] = 'filled'
+            attrs["style"] = "filled"
 
         # Get name for node
-        if hasattr(v, 'distribution'):
+        if hasattr(v, "distribution"):
             distribution = v.distribution.__class__.__name__
         else:
-            distribution = 'Deterministic'
-            attrs['shape'] = 'box'
+            distribution = "Deterministic"
+            attrs["shape"] = "box"
 
-        graph.node(var_name,
-                '{var_name} ~ {distribution}'.format(var_name=var_name, distribution=distribution),
-                **attrs)
+        graph.node(
+            var_name,
+            "{var_name} ~ {distribution}".format(
+                var_name=var_name, distribution=distribution
+            ),
+            **attrs
+        )
 
     def get_plates(self):
         """ Rough but surprisingly accurate plate detection.
@@ -125,9 +143,9 @@ def get_plates(self):
         plates = {}
         for var_name in self.var_names:
             v = self.model[var_name]
-            if hasattr(v, 'observations'):
+            if hasattr(v, "observations"):
                 shape = v.observations.shape
-            elif hasattr(v, 'dshape'):
+            elif hasattr(v, "dshape"):
                 shape = v.dshape
             else:
                 shape = v.tag.test_value.shape
@@ -148,19 +166,21 @@ def make_graph(self):
         try:
             import graphviz
         except ImportError:
-            raise ImportError('This function requires the python library graphviz, along with binaries. '
-                              'The easiest way to install all of this is by running\n\n'
-                              '\tconda install -c conda-forge python-graphviz')
+            raise ImportError(
+                "This function requires the python library graphviz, along with binaries. "
+                "The easiest way to install all of this is by running\n\n"
+                "\tconda install -c conda-forge python-graphviz"
+            )
         graph = graphviz.Digraph(self.model.name)
         for shape, var_names in self.get_plates().items():
-            label = ' x '.join(map('{:,d}'.format, shape))
+            label = " x ".join(map("{:,d}".format, shape))
             if label:
                 # must be preceded by 'cluster' to get a box around it
-                with graph.subgraph(name='cluster' + label) as sub:
+                with graph.subgraph(name="cluster" + label) as sub:
                     for var_name in var_names:
                         self._make_node(var_name, sub)
                     # plate label goes bottom right
-                    sub.attr(label=label, labeljust='r', labelloc='b', style='rounded')
+                    sub.attr(label=label, labeljust="r", labelloc="b", style="rounded")
             else:
                 for var_name in var_names:
                     self._make_node(var_name, graph)
diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
index 1c4952934e..a5736da55c 100644
--- a/pymc3/parallel_sampling.py
+++ b/pymc3/parallel_sampling.py
@@ -11,7 +11,7 @@
 
 from . import theanof
 
-logger = logging.getLogger('pymc3')
+logger = logging.getLogger("pymc3")
 
 
 # Taken from https://hg.python.org/cpython/rev/c4f92b597074
@@ -26,7 +26,7 @@ def __str__(self):
 class ExceptionWithTraceback:
     def __init__(self, exc, tb):
         tb = traceback.format_exception(type(exc), exc, tb)
-        tb = ''.join(tb)
+        tb = "".join(tb)
         self.exc = exc
         self.tb = '\n"""\n%s"""' % tb
 
@@ -54,8 +54,8 @@ class _Process(multiprocessing.Process):
     We communicate with the main process using a pipe,
     and send finished samples using shared memory.
     """
-    def __init__(self, name, msg_pipe, step_method, shared_point,
-                 draws, tune, seed):
+
+    def __init__(self, name, msg_pipe, step_method, shared_point, draws, tune, seed):
         super(_Process, self).__init__(daemon=True, name=name)
         self._msg_pipe = msg_pipe
         self._step_method = step_method
@@ -75,7 +75,7 @@ def run(self):
             pass
         except BaseException as e:
             e = ExceptionWithTraceback(e, e.__traceback__)
-            self._msg_pipe.send(('error', e))
+            self._msg_pipe.send(("error", e))
         finally:
             self._msg_pipe.close()
 
@@ -103,10 +103,10 @@ def _start_loop(self):
         tuning = True
 
         msg = self._recv_msg()
-        if msg[0] == 'abort':
+        if msg[0] == "abort":
             raise KeyboardInterrupt()
-        if msg[0] != 'start':
-            raise ValueError('Unexpected msg ' + msg[0])
+        if msg[0] != "start":
+            raise ValueError("Unexpected msg " + msg[0])
 
         while True:
             if draw < self._draws + self._tune:
@@ -119,9 +119,9 @@ def _start_loop(self):
                 tuning = False
 
             msg = self._recv_msg()
-            if msg[0] == 'abort':
+            if msg[0] == "abort":
                 raise KeyboardInterrupt()
-            elif msg[0] == 'write_next':
+            elif msg[0] == "write_next":
                 self._write_point(point)
                 is_last = draw + 1 == self._draws + self._tune
                 if is_last:
@@ -129,10 +129,11 @@ def _start_loop(self):
                 else:
                     warns = None
                 self._msg_pipe.send(
-                    ('writing_done', is_last, draw, tuning, stats, warns))
+                    ("writing_done", is_last, draw, tuning, stats, warns)
+                )
                 draw += 1
             else:
-                raise ValueError('Unknown message ' + msg[0])
+                raise ValueError("Unknown message " + msg[0])
 
     def _compute_point(self):
         if self._step_method.generates_stats:
@@ -143,7 +144,7 @@ def _compute_point(self):
         return point, stats
 
     def _collect_warnings(self):
-        if hasattr(self._step_method, 'warnings'):
+        if hasattr(self._step_method, "warnings"):
             return self._step_method.warnings()
         else:
             return []
@@ -151,6 +152,7 @@ def _collect_warnings(self):
 
 class ProcessAdapter(object):
     """Control a Chain process from the main thread."""
+
     def __init__(self, draws, tune, step_method, chain, seed, start):
         self.chain = chain
         process_name = "worker_chain_%s" % chain
@@ -164,9 +166,9 @@ def __init__(self, draws, tune, step_method, chain, seed, start):
                 size *= int(dim)
             size *= dtype.itemsize
             if size != ctypes.c_size_t(size).value:
-                raise ValueError('Variable %s is too large' % name)
+                raise ValueError("Variable %s is too large" % name)
 
-            array = multiprocessing.sharedctypes.RawArray('c', size)
+            array = multiprocessing.sharedctypes.RawArray("c", size)
             self._shared_point[name] = array
             array_np = np.frombuffer(array, dtype).reshape(shape)
             array_np[...] = start[name]
@@ -176,8 +178,14 @@ def __init__(self, draws, tune, step_method, chain, seed, start):
         self._num_samples = 0
 
         self._process = _Process(
-            process_name, remote_conn, step_method, self._shared_point,
-            draws, tune, seed)
+            process_name,
+            remote_conn,
+            step_method,
+            self._shared_point,
+            draws,
+            tune,
+            seed,
+        )
         # We fork right away, so that the main process can start tqdm threads
         self._process.start()
 
@@ -191,14 +199,14 @@ def shared_point_view(self):
         return self._point
 
     def start(self):
-        self._msg_pipe.send(('start',))
+        self._msg_pipe.send(("start",))
 
     def write_next(self):
         self._readable = False
-        self._msg_pipe.send(('write_next',))
+        self._msg_pipe.send(("write_next",))
 
     def abort(self):
-        self._msg_pipe.send(('abort',))
+        self._msg_pipe.send(("abort",))
 
     def join(self, timeout=None):
         self._process.join(timeout)
@@ -209,24 +217,24 @@ def terminate(self):
     @staticmethod
     def recv_draw(processes, timeout=3600):
         if not processes:
-            raise ValueError('No processes.')
+            raise ValueError("No processes.")
         pipes = [proc._msg_pipe for proc in processes]
         ready = multiprocessing.connection.wait(pipes)
         if not ready:
-            raise multiprocessing.TimeoutError('No message from samplers.')
+            raise multiprocessing.TimeoutError("No message from samplers.")
         idxs = {id(proc._msg_pipe): proc for proc in processes}
         proc = idxs[id(ready[0])]
         msg = ready[0].recv()
 
-        if msg[0] == 'error':
+        if msg[0] == "error":
             old = msg[1]
-            six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old)
-        elif msg[0] == 'writing_done':
+            six.raise_from(RuntimeError("Chain %s failed." % proc.chain), old)
+        elif msg[0] == "writing_done":
             proc._readable = True
             proc._num_samples += 1
             return (proc,) + msg[1:]
         else:
-            raise ValueError('Sampler sent bad message.')
+            raise ValueError("Sampler sent bad message.")
 
     @staticmethod
     def terminate_all(processes, patience=2):
@@ -244,8 +252,10 @@ def terminate_all(processes, patience=2):
                     raise multiprocessing.TimeoutError()
                 process.join(timeout)
         except multiprocessing.TimeoutError:
-            logger.warn('Chain processes did not terminate as expected. '
-                        'Terminating forcefully...')
+            logger.warn(
+                "Chain processes did not terminate as expected. "
+                "Terminating forcefully..."
+            )
             for process in processes:
                 process.terminate()
             for process in processes:
@@ -253,25 +263,35 @@ def terminate_all(processes, patience=2):
 
 
 Draw = namedtuple(
-    'Draw',
-    ['chain', 'is_last', 'draw_idx', 'tuning', 'stats', 'point', 'warnings']
+    "Draw", ["chain", "is_last", "draw_idx", "tuning", "stats", "point", "warnings"]
 )
 
 
 class ParallelSampler(object):
-    def __init__(self, draws, tune, chains, cores, seeds, start_points,
-                 step_method, start_chain_num=0, progressbar=True):
+    def __init__(
+        self,
+        draws,
+        tune,
+        chains,
+        cores,
+        seeds,
+        start_points,
+        step_method,
+        start_chain_num=0,
+        progressbar=True,
+    ):
         if progressbar:
             import tqdm
+
             tqdm_ = tqdm.tqdm
 
         if any(len(arg) != chains for arg in [seeds, start_points]):
-            raise ValueError(
-                'Number of seeds and start_points must be %s.' % chains)
+            raise ValueError("Number of seeds and start_points must be %s." % chains)
 
         self._samplers = [
-            ProcessAdapter(draws, tune, step_method,
-                           chain + start_chain_num, seed, start)
+            ProcessAdapter(
+                draws, tune, step_method, chain + start_chain_num, seed, start
+            )
             for chain, seed, start in zip(range(chains), seeds, start_points)
         ]
 
@@ -286,8 +306,10 @@ def __init__(self, draws, tune, chains, cores, seeds, start_points,
         self._progress = None
         if progressbar:
             self._progress = tqdm_(
-                total=chains * (draws + tune), unit='draws',
-                desc='Sampling %s chains' % chains)
+                total=chains * (draws + tune),
+                unit="draws",
+                desc="Sampling %s chains" % chains,
+            )
 
     def _make_active(self):
         while self._inactive and len(self._active) < self._max_active:
@@ -298,7 +320,7 @@ def _make_active(self):
 
     def __iter__(self):
         if not self._in_context:
-            raise ValueError('Use ParallelSampler as context manager.')
+            raise ValueError("Use ParallelSampler as context manager.")
         self._make_active()
 
         while self._active:
@@ -317,8 +339,7 @@ def __iter__(self):
             # and only call proc.write_next() after the yield returns.
             # This seems to be faster overally though, as the worker
             # loses less time waiting.
-            point = {name: val.copy()
-                     for name, val in proc.shared_point_view.items()}
+            point = {name: val.copy() for name, val in proc.shared_point_view.items()}
 
             # Already called for new proc in _make_active
             if not is_last:
diff --git a/pymc3/plots/artists.py b/pymc3/plots/artists.py
index 81c9dbae51..192c0328db 100644
--- a/pymc3/plots/artists.py
+++ b/pymc3/plots/artists.py
@@ -13,17 +13,18 @@ def _histplot_bins(column, bins=100):
     return range(col_min, col_max + 2, max((col_max - col_min) // bins, 1))
 
 
-def histplot_op(ax, data, alpha=.35):
+def histplot_op(ax, data, alpha=0.35):
     """Add a histogram for each column of the data to the provided axes."""
     hs = []
     for column in data.T:
-        hs.append(ax.hist(column, bins=_histplot_bins(
-                  column), alpha=alpha, align='left'))
+        hs.append(
+            ax.hist(column, bins=_histplot_bins(column), alpha=alpha, align="left")
+        )
     ax.set_xlim(np.min(data) - 0.5, np.max(data) + 0.5)
     return hs
 
 
-def kdeplot_op(ax, data, bw, prior=None, prior_alpha=1, prior_style='--'):
+def kdeplot_op(ax, data, bw, prior=None, prior_alpha=1, prior_style="--"):
     """Get a list of density and likelihood plots, if a prior is provided."""
     ls = []
     pls = []
@@ -34,26 +35,41 @@ def kdeplot_op(ax, data, bw, prior=None, prior_alpha=1, prior_style='--'):
             x = np.linspace(l, u, len(density))
             if prior is not None:
                 p = prior.logp(x).eval()
-                pls.append(ax.plot(x, np.exp(p),
-                                   alpha=prior_alpha, ls=prior_style))
+                pls.append(ax.plot(x, np.exp(p), alpha=prior_alpha, ls=prior_style))
 
             ls.append(ax.plot(x, density))
         except ValueError:
             errored.append(str(i))
 
     if errored:
-        ax.text(.27, .47, 'WARNING: KDE plot failed for: ' + ','.join(errored),
-                bbox={'facecolor': 'red', 'alpha': 0.5, 'pad': 10},
-                style='italic')
+        ax.text(
+            0.27,
+            0.47,
+            "WARNING: KDE plot failed for: " + ",".join(errored),
+            bbox={"facecolor": "red", "alpha": 0.5, "pad": 10},
+            style="italic",
+        )
 
     return ls, pls
 
 
-def plot_posterior_op(trace_values, ax, bw, kde_plot, point_estimate, round_to,
-                      alpha_level, ref_val, rope, text_size=16, **kwargs):
+def plot_posterior_op(
+    trace_values,
+    ax,
+    bw,
+    kde_plot,
+    point_estimate,
+    round_to,
+    alpha_level,
+    ref_val,
+    rope,
+    text_size=16,
+    **kwargs
+):
     """Artist to draw posterior."""
+
     def format_as_percent(x, round_to=0):
-        return '{0:.{1:d}f}%'.format(100 * x, round_to)
+        return "{0:.{1:d}f}%".format(100 * x, round_to)
 
     def display_ref_val(ref_val):
         less_than_ref_probability = (trace_values < ref_val).mean()
@@ -61,80 +77,117 @@ def display_ref_val(ref_val):
         ref_in_posterior = "{} <{:g}< {}".format(
             format_as_percent(less_than_ref_probability, 1),
             ref_val,
-            format_as_percent(greater_than_ref_probability, 1))
-        ax.axvline(ref_val, ymin=0.02, ymax=.75, color='g',
-                   linewidth=4, alpha=0.65)
-        ax.text(trace_values.mean(), plot_height * 0.6, ref_in_posterior,
-                size=text_size, horizontalalignment='center')
+            format_as_percent(greater_than_ref_probability, 1),
+        )
+        ax.axvline(ref_val, ymin=0.02, ymax=0.75, color="g", linewidth=4, alpha=0.65)
+        ax.text(
+            trace_values.mean(),
+            plot_height * 0.6,
+            ref_in_posterior,
+            size=text_size,
+            horizontalalignment="center",
+        )
 
     def display_rope(rope):
-        ax.plot(rope, (plot_height * 0.02, plot_height * 0.02),
-                linewidth=20, color='r', alpha=0.75)
-        text_props = dict(size=text_size, horizontalalignment='center', color='r')
+        ax.plot(
+            rope,
+            (plot_height * 0.02, plot_height * 0.02),
+            linewidth=20,
+            color="r",
+            alpha=0.75,
+        )
+        text_props = dict(size=text_size, horizontalalignment="center", color="r")
         ax.text(rope[0], plot_height * 0.14, rope[0], **text_props)
         ax.text(rope[1], plot_height * 0.14, rope[1], **text_props)
 
     def display_point_estimate():
         if not point_estimate:
             return
-        if point_estimate not in ('mode', 'mean', 'median'):
-            raise ValueError(
-                "Point Estimate should be in ('mode','mean','median')")
-        if point_estimate == 'mean':
+        if point_estimate not in ("mode", "mean", "median"):
+            raise ValueError("Point Estimate should be in ('mode','mean','median')")
+        if point_estimate == "mean":
             point_value = trace_values.mean()
-        elif point_estimate == 'mode':
+        elif point_estimate == "mode":
             if isinstance(trace_values[0], float):
                 density, l, u = fast_kde(trace_values, bw)
                 x = np.linspace(l, u, len(density))
                 point_value = x[np.argmax(density)]
             else:
                 point_value = mode(trace_values.round(round_to))[0][0]
-        elif point_estimate == 'median':
+        elif point_estimate == "median":
             point_value = np.median(trace_values)
-        point_text = '{point_estimate}={point_value:.{round_to}f}'.format(point_estimate=point_estimate,
-                                                                          point_value=point_value, round_to=round_to)
-
-        ax.text(point_value, plot_height * 0.8, point_text,
-                size=text_size, horizontalalignment='center')
+        point_text = "{point_estimate}={point_value:.{round_to}f}".format(
+            point_estimate=point_estimate, point_value=point_value, round_to=round_to
+        )
+
+        ax.text(
+            point_value,
+            plot_height * 0.8,
+            point_text,
+            size=text_size,
+            horizontalalignment="center",
+        )
 
     def display_hpd():
         hpd_intervals = hpd(trace_values, alpha=alpha_level)
-        ax.plot(hpd_intervals, (plot_height * 0.02,
-                                plot_height * 0.02), linewidth=4, color='k')
-        ax.text(hpd_intervals[0], plot_height * 0.07,
-                hpd_intervals[0].round(round_to),
-                size=text_size, horizontalalignment='right')
-        ax.text(hpd_intervals[1], plot_height * 0.07,
-                hpd_intervals[1].round(round_to),
-                size=text_size, horizontalalignment='left')
-        ax.text((hpd_intervals[0] + hpd_intervals[1]) / 2, plot_height * 0.2,
-                format_as_percent(1 - alpha_level) + ' HPD',
-                size=text_size, horizontalalignment='center')
+        ax.plot(
+            hpd_intervals,
+            (plot_height * 0.02, plot_height * 0.02),
+            linewidth=4,
+            color="k",
+        )
+        ax.text(
+            hpd_intervals[0],
+            plot_height * 0.07,
+            hpd_intervals[0].round(round_to),
+            size=text_size,
+            horizontalalignment="right",
+        )
+        ax.text(
+            hpd_intervals[1],
+            plot_height * 0.07,
+            hpd_intervals[1].round(round_to),
+            size=text_size,
+            horizontalalignment="left",
+        )
+        ax.text(
+            (hpd_intervals[0] + hpd_intervals[1]) / 2,
+            plot_height * 0.2,
+            format_as_percent(1 - alpha_level) + " HPD",
+            size=text_size,
+            horizontalalignment="center",
+        )
 
     def format_axes():
         ax.yaxis.set_ticklabels([])
-        ax.spines['top'].set_visible(False)
-        ax.spines['right'].set_visible(False)
-        ax.spines['left'].set_visible(False)
-        ax.spines['bottom'].set_visible(True)
-        ax.yaxis.set_ticks_position('none')
-        ax.xaxis.set_ticks_position('bottom')
-        ax.tick_params(axis='x', direction='out', width=1, length=3,
-                       color='0.5', labelsize=text_size)
-        ax.spines['bottom'].set_color('0.5')
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        ax.spines["left"].set_visible(False)
+        ax.spines["bottom"].set_visible(True)
+        ax.yaxis.set_ticks_position("none")
+        ax.xaxis.set_ticks_position("bottom")
+        ax.tick_params(
+            axis="x",
+            direction="out",
+            width=1,
+            length=3,
+            color="0.5",
+            labelsize=text_size,
+        )
+        ax.spines["bottom"].set_color("0.5")
 
     def set_key_if_doesnt_exist(d, key, value):
         if key not in d:
             d[key] = value
 
     if kde_plot and isinstance(trace_values[0], float):
-        kdeplot(trace_values, alpha=kwargs.pop('alpha', 0.35), bw=bw, ax=ax, **kwargs)
+        kdeplot(trace_values, alpha=kwargs.pop("alpha", 0.35), bw=bw, ax=ax, **kwargs)
 
     else:
-        set_key_if_doesnt_exist(kwargs, 'bins', 30)
-        set_key_if_doesnt_exist(kwargs, 'edgecolor', 'w')
-        set_key_if_doesnt_exist(kwargs, 'align', 'right')
-        set_key_if_doesnt_exist(kwargs, 'color', '#87ceeb')
+        set_key_if_doesnt_exist(kwargs, "bins", 30)
+        set_key_if_doesnt_exist(kwargs, "edgecolor", "w")
+        set_key_if_doesnt_exist(kwargs, "align", "right")
+        set_key_if_doesnt_exist(kwargs, "color", "#87ceeb")
         ax.hist(trace_values, **kwargs)
 
     plot_height = ax.get_ylim()[1]
@@ -147,26 +200,27 @@ def set_key_if_doesnt_exist(d, key, value):
     if rope is not None:
         display_rope(rope)
 
+
 def scale_text(figsize, text_size):
-        """Scale text to figsize."""
+    """Scale text to figsize."""
 
-        if text_size is None and figsize is not None:
-            if figsize[0] <= 11:
-                return 12
-            else:
-                return figsize[0]
+    if text_size is None and figsize is not None:
+        if figsize[0] <= 11:
+            return 12
         else:
-            return text_size
+            return figsize[0]
+    else:
+        return text_size
+
 
 def get_trace_dict(tr, varnames):
-        traces = OrderedDict()
-        for v in varnames:
-            vals = tr.get_values(v, combine=True, squeeze=True)
-            if vals.ndim > 1:
-                vals_flat = vals.reshape(vals.shape[0], -1).T
-                for i, vi in enumerate(vals_flat):
-                    traces['_'.join([v, str(i)])] = vi
-            else:
-                traces[v] = vals
-        return traces
-        
\ No newline at end of file
+    traces = OrderedDict()
+    for v in varnames:
+        vals = tr.get_values(v, combine=True, squeeze=True)
+        if vals.ndim > 1:
+            vals_flat = vals.reshape(vals.shape[0], -1).T
+            for i, vi in enumerate(vals_flat):
+                traces["_".join([v, str(i)])] = vi
+        else:
+            traces[v] = vals
+    return traces
diff --git a/pymc3/plots/autocorrplot.py b/pymc3/plots/autocorrplot.py
index 0c9d053a52..d390f0e110 100644
--- a/pymc3/plots/autocorrplot.py
+++ b/pymc3/plots/autocorrplot.py
@@ -9,8 +9,16 @@
 from .utils import get_default_varnames, get_axis
 
 
-def autocorrplot(trace, varnames=None, max_lag=100, burn=0, plot_transformed=False,
-                 symmetric_plot=False, ax=None, figsize=None):
+def autocorrplot(
+    trace,
+    varnames=None,
+    max_lag=100,
+    burn=0,
+    plot_transformed=False,
+    symmetric_plot=False,
+    ax=None,
+    figsize=None,
+):
     """Bar plot of the autocorrelation function for a trace.
 
     Parameters
@@ -39,39 +47,53 @@ def autocorrplot(trace, varnames=None, max_lag=100, burn=0, plot_transformed=Fal
     -------
     ax : matplotlib axes
     """
+
     def _handle_array_varnames(varname):
         if trace[0][varname].__class__ is np.ndarray:
             k = trace[varname].shape[1]
             for i in range(k):
-                yield varname + '_{0}'.format(i)
+                yield varname + "_{0}".format(i)
         else:
             yield varname
 
     if varnames is None:
         varnames = get_default_varnames(trace.varnames, plot_transformed)
 
-    varnames = list(itertools.chain.from_iterable(map(_handle_array_varnames, varnames)))
+    varnames = list(
+        itertools.chain.from_iterable(map(_handle_array_varnames, varnames))
+    )
 
     nchains = trace.nchains
 
     if figsize is None:
         figsize = (12, len(varnames) * 2)
 
-    ax = get_axis(ax, len(varnames), nchains,
-                  squeeze=False, sharex=True, sharey=True, figsize=figsize)
+    ax = get_axis(
+        ax,
+        len(varnames),
+        nchains,
+        squeeze=False,
+        sharex=True,
+        sharey=True,
+        figsize=figsize,
+    )
 
     max_lag = min(len(trace) - 1, max_lag)
 
     for i, v in enumerate(varnames):
         for j, chain in enumerate(trace.chains):
             try:
-                d = np.squeeze(trace.get_values(v, chains=[chain], burn=burn,
-                                                combine=False))
+                d = np.squeeze(
+                    trace.get_values(v, chains=[chain], burn=burn, combine=False)
+                )
             except KeyError:
-                k = int(v.split('_')[-1])
-                v_use = '_'.join(v.split('_')[:-1])
-                d = np.squeeze(trace.get_values(v_use, chains=[chain],
-                                                burn=burn, combine=False)[:, k])
+                k = int(v.split("_")[-1])
+                v_use = "_".join(v.split("_")[:-1])
+                d = np.squeeze(
+                    trace.get_values(v_use, chains=[chain], burn=burn, combine=False)[
+                        :, k
+                    ]
+                )
 
             ax[i, j].acorr(d, detrend=plt.mlab.detrend_mean, maxlags=max_lag)
 
diff --git a/pymc3/plots/compareplot.py b/pymc3/plots/compareplot.py
index d6f31fdcfd..3542c456a6 100644
--- a/pymc3/plots/compareplot.py
+++ b/pymc3/plots/compareplot.py
@@ -1,12 +1,14 @@
 import numpy as np
+
 try:
     import matplotlib.pyplot as plt
 except ImportError:  # mpl is optional
     pass
 
 
-def compareplot(comp_df, insample_dev=True, se=True, dse=True, ax=None,
-                plot_kwargs=None):
+def compareplot(
+    comp_df, insample_dev=True, se=True, dse=True, ax=None, plot_kwargs=None
+):
     """
     Model comparison summary plot in the style of the one used in the book
     Statistical Rethinking by Richard McElreath.
@@ -44,59 +46,68 @@ def compareplot(comp_df, insample_dev=True, se=True, dse=True, ax=None,
     if plot_kwargs is None:
         plot_kwargs = {}
 
-    yticks_pos, step = np.linspace(0, -1, (comp_df.shape[0] * 2) - 1,
-                                   retstep=True)
+    yticks_pos, step = np.linspace(0, -1, (comp_df.shape[0] * 2) - 1, retstep=True)
     yticks_pos[1::2] = yticks_pos[1::2] + step / 2
 
-    yticks_labels = [''] * len(yticks_pos)
-    
-    ic = 'WAIC'
+    yticks_labels = [""] * len(yticks_pos)
+
+    ic = "WAIC"
     if ic not in comp_df.columns:
-        ic = 'LOO'
+        ic = "LOO"
 
     if dse:
         yticks_labels[0] = comp_df.index[0]
         yticks_labels[2::2] = comp_df.index[1:]
         ax.set_yticks(yticks_pos)
-        ax.errorbar(x=comp_df[ic].iloc[1:],
-                    y=yticks_pos[1::2],
-                    xerr=comp_df.dSE[1:],
-                    color=plot_kwargs.get('color_dse', 'grey'),
-                    fmt=plot_kwargs.get('marker_dse', '^'))
+        ax.errorbar(
+            x=comp_df[ic].iloc[1:],
+            y=yticks_pos[1::2],
+            xerr=comp_df.dSE[1:],
+            color=plot_kwargs.get("color_dse", "grey"),
+            fmt=plot_kwargs.get("marker_dse", "^"),
+        )
 
     else:
         yticks_labels = comp_df.index
         ax.set_yticks(yticks_pos[::2])
 
     if se:
-        ax.errorbar(x=comp_df[ic],
-                    y=yticks_pos[::2],
-                    xerr=comp_df.SE,
-                    color=plot_kwargs.get('color_ic', 'k'),
-                    fmt=plot_kwargs.get('marker_ic', 'o'),
-                    mfc='None',
-                    mew=1)
+        ax.errorbar(
+            x=comp_df[ic],
+            y=yticks_pos[::2],
+            xerr=comp_df.SE,
+            color=plot_kwargs.get("color_ic", "k"),
+            fmt=plot_kwargs.get("marker_ic", "o"),
+            mfc="None",
+            mew=1,
+        )
     else:
-        ax.plot(comp_df[ic],
-                yticks_pos[::2],
-                color=plot_kwargs.get('color_ic', 'k'),
-                marker=plot_kwargs.get('marker_ic', 'o'),
-                mfc='None',
-                mew=1,
-                lw=0)
+        ax.plot(
+            comp_df[ic],
+            yticks_pos[::2],
+            color=plot_kwargs.get("color_ic", "k"),
+            marker=plot_kwargs.get("marker_ic", "o"),
+            mfc="None",
+            mew=1,
+            lw=0,
+        )
 
     if insample_dev:
-        ax.plot(comp_df[ic] - (2 * comp_df['p'+ic]),
-                yticks_pos[::2],
-                color=plot_kwargs.get('color_insample_dev', 'k'),
-                marker=plot_kwargs.get('marker_insample_dev', 'o'),
-                lw=0)
-
-    ax.axvline(comp_df[ic].iloc[0],
-               ls=plot_kwargs.get('ls_min_ic', '--'),
-               color=plot_kwargs.get('color_ls_min_ic', 'grey'))
-
-    ax.set_xlabel('Deviance', fontsize=plot_kwargs.get('fontsize', 14))
+        ax.plot(
+            comp_df[ic] - (2 * comp_df["p" + ic]),
+            yticks_pos[::2],
+            color=plot_kwargs.get("color_insample_dev", "k"),
+            marker=plot_kwargs.get("marker_insample_dev", "o"),
+            lw=0,
+        )
+
+    ax.axvline(
+        comp_df[ic].iloc[0],
+        ls=plot_kwargs.get("ls_min_ic", "--"),
+        color=plot_kwargs.get("color_ls_min_ic", "grey"),
+    )
+
+    ax.set_xlabel("Deviance", fontsize=plot_kwargs.get("fontsize", 14))
     ax.set_yticklabels(yticks_labels)
     ax.set_ylim(-1 + step, 0 - step)
 
diff --git a/pymc3/plots/densityplot.py b/pymc3/plots/densityplot.py
index 74d91cdee3..839880d8e7 100644
--- a/pymc3/plots/densityplot.py
+++ b/pymc3/plots/densityplot.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 try:
     import matplotlib.pyplot as plt
 except ImportError:  # mpl is optional
@@ -8,9 +9,22 @@
 from ..stats import hpd
 
 
-def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='mean',
-                colors='cycle', outline=True, hpd_markers='', shade=0., bw=4.5, figsize=None,
-                textsize=12, plot_transformed=False, ax=None):
+def densityplot(
+    trace,
+    models=None,
+    varnames=None,
+    alpha=0.05,
+    point_estimate="mean",
+    colors="cycle",
+    outline=True,
+    hpd_markers="",
+    shade=0.0,
+    bw=4.5,
+    figsize=None,
+    textsize=12,
+    plot_transformed=False,
+    ax=None,
+):
     """
     Generates KDE plots for continuous variables and histograms for discretes ones.
     Plots are truncated at their 100*(1-alpha)% credible intervals. Plots are grouped
@@ -64,7 +78,7 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m
     ax : Matplotlib axes
 
     """
-    if point_estimate not in ('mean', 'median', None):
+    if point_estimate not in ("mean", "median", None):
         raise ValueError("Point estimate should be 'mean', 'median' or None")
 
     if not isinstance(trace, (list, tuple)):
@@ -74,17 +88,18 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m
 
     if models is None:
         if length_trace > 1:
-            models = ['m_{}'.format(i) for i in range(length_trace)]
+            models = ["m_{}".format(i) for i in range(length_trace)]
         else:
-            models = ['']
+            models = [""]
     elif len(models) != length_trace:
         raise ValueError(
-            "The number of names for the models does not match the number of models")
+            "The number of names for the models does not match the number of models"
+        )
 
     length_models = len(models)
 
-    if colors == 'cycle':
-        colors = ['C{}'.format(i % 10) for i in range(length_models)]
+    if colors == "cycle":
+        colors = ["C{}".format(i % 10) for i in range(length_models)]
     elif isinstance(colors, str):
         colors = [colors for i in range(length_models)]
 
@@ -110,12 +125,32 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m
                 if k > 1:
                     vec = np.split(vec.T.ravel(), k)
                     for i in range(k):
-                        _d_helper(vec[i], vname, colors[t_idx], bw, alpha, point_estimate,
-                                  hpd_markers, outline, shade, dplot[v_idx])
+                        _d_helper(
+                            vec[i],
+                            vname,
+                            colors[t_idx],
+                            bw,
+                            alpha,
+                            point_estimate,
+                            hpd_markers,
+                            outline,
+                            shade,
+                            dplot[v_idx],
+                        )
 
                 else:
-                    _d_helper(vec, vname, colors[t_idx], bw, alpha, point_estimate,
-                              hpd_markers, outline, shade, dplot[v_idx])
+                    _d_helper(
+                        vec,
+                        vname,
+                        colors[t_idx],
+                        bw,
+                        alpha,
+                        point_estimate,
+                        hpd_markers,
+                        outline,
+                        shade,
+                        dplot[v_idx],
+                    )
 
     if length_trace > 1:
         for m_idx, m in enumerate(models):
@@ -127,7 +162,9 @@ def densityplot(trace, models=None, varnames=None, alpha=0.05, point_estimate='m
     return dplot
 
 
-def _d_helper(vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, shade, ax):
+def _d_helper(
+    vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, shade, ax
+):
     """
     vec : array
         1D array from trace
@@ -148,7 +185,7 @@ def _d_helper(vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, sh
         (opaque). Defaults to 0.
     ax : matplotlib axes
     """
-    if vec.dtype.kind == 'f':
+    if vec.dtype.kind == "f":
         density, l, u = fast_kde(vec)
         x = np.linspace(l, u, len(density))
         hpd_ = hpd(vec, alpha)
@@ -161,31 +198,31 @@ def _d_helper(vec, vname, c, bw, alpha, point_estimate, hpd_markers, outline, sh
 
         if outline:
             ax.plot(x[cut], density[cut], color=c)
-            ax.plot([xmin, xmin], [-ymin/100, ymin], color=c, ls='-')
-            ax.plot([xmax, xmax], [-ymax/100, ymax], color=c, ls='-')
+            ax.plot([xmin, xmin], [-ymin / 100, ymin], color=c, ls="-")
+            ax.plot([xmax, xmax], [-ymax / 100, ymax], color=c, ls="-")
 
         if shade:
             ax.fill_between(x, density, where=cut, color=c, alpha=shade)
 
     else:
         xmin, xmax = hpd(vec, alpha)
-        bins = range(xmin, xmax+1)
+        bins = range(xmin, xmax + 1)
         if outline:
-            ax.hist(vec, bins=bins, color=c, histtype='step')
+            ax.hist(vec, bins=bins, color=c, histtype="step")
         ax.hist(vec, bins=bins, color=c, alpha=shade)
 
     if hpd_markers:
-        ax.plot(xmin, 0, hpd_markers, color=c, markeredgecolor='k')
-        ax.plot(xmax, 0, hpd_markers, color=c, markeredgecolor='k')
+        ax.plot(xmin, 0, hpd_markers, color=c, markeredgecolor="k")
+        ax.plot(xmax, 0, hpd_markers, color=c, markeredgecolor="k")
 
     if point_estimate is not None:
-        if point_estimate == 'mean':
+        if point_estimate == "mean":
             ps = np.mean(vec)
-        elif point_estimate == 'median':
+        elif point_estimate == "median":
             ps = np.median(vec)
-        ax.plot(ps, -0.001, 'o', color=c, markeredgecolor='k')
+        ax.plot(ps, -0.001, "o", color=c, markeredgecolor="k")
 
     ax.set_yticks([])
     ax.set_title(vname)
-    for pos in ['left', 'right', 'top']:
+    for pos in ["left", "right", "top"]:
         ax.spines[pos].set_visible(0)
diff --git a/pymc3/plots/energyplot.py b/pymc3/plots/energyplot.py
index 5fbba176ce..e6a44ee52f 100644
--- a/pymc3/plots/energyplot.py
+++ b/pymc3/plots/energyplot.py
@@ -1,6 +1,7 @@
 import warnings
 
 import numpy as np
+
 try:
     import matplotlib.pyplot as plt
 except ImportError:  # mpl is optional
@@ -8,8 +9,18 @@
 from .kdeplot import kdeplot
 
 
-def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, shade=0.35, bw=4.5,
-               frame=True, kwargs_shade=None, **kwargs):
+def energyplot(
+    trace,
+    kind="kde",
+    figsize=None,
+    ax=None,
+    legend=True,
+    shade=0.35,
+    bw=4.5,
+    frame=True,
+    kwargs_shade=None,
+    **kwargs
+):
     """Plot energy transition distribution and marginal energy distribution in
     order to diagnose poor exploration by HMC algorithms.
 
@@ -46,13 +57,15 @@ def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, shade=0.35
         _, ax = plt.subplots(figsize=figsize)
 
     try:
-        energy = trace['energy']
+        energy = trace["energy"]
     except KeyError:
-        warnings.warn('There is no energy information in the passed trace.')
+        warnings.warn("There is no energy information in the passed trace.")
         return ax
 
-    series = [('Marginal energy distribution', energy - energy.mean()),
-              ('Energy transition distribution', np.diff(energy))]
+    series = [
+        ("Marginal energy distribution", energy - energy.mean()),
+        ("Energy transition distribution", np.diff(energy)),
+    ]
 
     if figsize is None:
         figsize = (8, 6)
@@ -60,17 +73,24 @@ def energyplot(trace, kind='kde', figsize=None, ax=None, legend=True, shade=0.35
     if kwargs_shade is None:
         kwargs_shade = {}
 
-    if kind == 'kde':
+    if kind == "kde":
         for label, value in series:
-            kdeplot(value, label=label, shade=shade, bw=bw, ax=ax, kwargs_shade=kwargs_shade,
-                    **kwargs)
-
-    elif kind == 'hist':
+            kdeplot(
+                value,
+                label=label,
+                shade=shade,
+                bw=bw,
+                ax=ax,
+                kwargs_shade=kwargs_shade,
+                **kwargs
+            )
+
+    elif kind == "hist":
         for label, value in series:
             ax.hist(value, alpha=shade, label=label, **kwargs)
 
     else:
-        raise ValueError('Plot type {} not recognized.'.format(kind))
+        raise ValueError("Plot type {} not recognized.".format(kind))
 
     ax.set_xticks([])
     ax.set_yticks([])
diff --git a/pymc3/plots/forestplot.py b/pymc3/plots/forestplot.py
index bf103903ff..aced43a213 100644
--- a/pymc3/plots/forestplot.py
+++ b/pymc3/plots/forestplot.py
@@ -8,6 +8,7 @@
 from pymc3.stats import quantiles, hpd, dict2pd
 from .utils import identity_transform, get_default_varnames
 
+
 def _var_str(name, shape):
     """Return a sequence of strings naming the element of the tallyable object.
 
@@ -17,8 +18,8 @@ def _var_str(name, shape):
     """
     size = np.prod(shape)
     ind = (np.indices(shape)).reshape(-1, size)
-    names = ['[' + ','.join(map(str, i)) + ']' for i in zip(*ind)]
-    names[0] = '%s %s' % (name, names[0])
+    names = ["[" + ",".join(map(str, i)) + "]" for i in zip(*ind)]
+    names[0] = "%s %s" % (name, names[0])
     return names
 
 
@@ -44,31 +45,61 @@ def _plot_tree(ax, y, ntiles, show_quartiles, c, plot_kwargs):
     """
     if show_quartiles:
         # Plot median
-        ax.plot(ntiles[2], y, color=c,
-                marker=plot_kwargs.get('marker', 'o'),
-                markersize=plot_kwargs.get('markersize', 4))
+        ax.plot(
+            ntiles[2],
+            y,
+            color=c,
+            marker=plot_kwargs.get("marker", "o"),
+            markersize=plot_kwargs.get("markersize", 4),
+        )
         # Plot quartile interval
-        ax.errorbar(x=(ntiles[1], ntiles[3]), y=(y, y),
-                    linewidth=plot_kwargs.get('linewidth', 2),
-                    color=c)
+        ax.errorbar(
+            x=(ntiles[1], ntiles[3]),
+            y=(y, y),
+            linewidth=plot_kwargs.get("linewidth", 2),
+            color=c,
+        )
 
     else:
         # Plot median
-        ax.plot(ntiles[1], y, marker=plot_kwargs.get('marker', 'o'),
-                color=c, markersize=plot_kwargs.get('markersize', 4))
+        ax.plot(
+            ntiles[1],
+            y,
+            marker=plot_kwargs.get("marker", "o"),
+            color=c,
+            markersize=plot_kwargs.get("markersize", 4),
+        )
 
     # Plot outer interval
-    ax.errorbar(x=(ntiles[0], ntiles[-1]), y=(y, y),
-                linewidth=int(plot_kwargs.get('linewidth', 2)/2),
-                color=c)
+    ax.errorbar(
+        x=(ntiles[0], ntiles[-1]),
+        y=(y, y),
+        linewidth=int(plot_kwargs.get("linewidth", 2) / 2),
+        color=c,
+    )
 
     return ax
 
 
-def forestplot(trace, models=None, varnames=None, transform=identity_transform,
-               alpha=0.05, quartiles=True, rhat=True, main=None, xtitle=None,
-               xlim=None, ylabels=None, colors='C0', chain_spacing=0.1, vline=0,
-               gs=None, plot_transformed=False, plot_kwargs=None):
+def forestplot(
+    trace,
+    models=None,
+    varnames=None,
+    transform=identity_transform,
+    alpha=0.05,
+    quartiles=True,
+    rhat=True,
+    main=None,
+    xtitle=None,
+    xlim=None,
+    ylabels=None,
+    colors="C0",
+    chain_spacing=0.1,
+    vline=0,
+    gs=None,
+    plot_transformed=False,
+    plot_kwargs=None,
+):
     """
     Forest plot (model summary plot).
 
@@ -139,15 +170,16 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
 
     if models is None:
         if len(trace) > 1:
-            models = ['m_{}'.format(i) for i in range(len(trace))]
+            models = ["m_{}".format(i) for i in range(len(trace))]
         else:
-            models = ['']
+            models = [""]
     elif len(models) != len(trace):
-        raise ValueError("The number of names for the models does not match "
-                         "the number of models")
+        raise ValueError(
+            "The number of names for the models does not match " "the number of models"
+        )
 
-    if colors == 'cycle':
-        colors = ['C{}'.format(i % 10) for i in range(len(models))]
+    if colors == "cycle":
+        colors = ["C{}".format(i % 10) for i in range(len(models))]
     elif isinstance(colors, str):
         colors = [colors for i in range(len(models))]
 
@@ -177,7 +209,7 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
             gr_plot.set_xticks((1.0, 1.5, 2.0), ("1", "1.5", "2+"))
             gr_plot.set_xlim(0.9, 2.1)
             gr_plot.set_yticks([])
-            gr_plot.set_title('R-hat')
+            gr_plot.set_title("R-hat")
         else:
             gs = gridspec.GridSpec(1, 1)
 
@@ -187,10 +219,8 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
     trace_quantiles = []
     hpd_intervals = []
     for tr in trace:
-        trace_quantiles.append(quantiles(tr, qlist, transform=transform,
-                                         squeeze=False))
-        hpd_intervals.append(hpd(tr, alpha, transform=transform,
-                                 squeeze=False))
+        trace_quantiles.append(quantiles(tr, qlist, transform=transform, squeeze=False))
+        hpd_intervals.append(hpd(tr, alpha, transform=transform, squeeze=False))
 
     labels = []
     var = 0
@@ -200,7 +230,7 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
     for v_idx, v in enumerate(varnames):
         for h, tr in enumerate(trace):
             if v not in tr.varnames:
-                labels.append(models[h] + ' ' + v)
+                labels.append(models[h] + " " + v)
                 var += 1
             else:
                 for j, chain in enumerate(tr.chains):
@@ -224,17 +254,19 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
                     if j == 0:
                         if k > 1:
                             names = _var_str(v, np.shape(value))
-                            names[0] = models[h] + ' ' + names[0]
+                            names[0] = models[h] + " " + names[0]
                             labels += names
                         else:
-                            labels.append(models[h] + ' ' + v)
+                            labels.append(models[h] + " " + v)
 
                     # Add spacing for each chain, if more than one
-                    offset = [0] + [(chain_spacing * ((i + 2) / 2)) *
-                                    (-1) ** i for i in range(nchains[h] - 1)]
+                    offset = [0] + [
+                        (chain_spacing * ((i + 2) / 2)) * (-1) ** i
+                        for i in range(nchains[h] - 1)
+                    ]
 
                     # Y coordinate with offset
-                    y = - var + offset[j]
+                    y = -var + offset[j]
 
                     # Deal with multivariate nodes
 
@@ -242,34 +274,41 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
                         qs = np.moveaxis(np.array(quants), 0, -1).squeeze()
                         for q in qs.reshape(-1, len(quants)):
                             # Multiple y values
-                            interval_plot = _plot_tree(interval_plot, y, q,
-                                                       quartiles, colors[h],
-                                                       plot_kwargs)
+                            interval_plot = _plot_tree(
+                                interval_plot, y, q, quartiles, colors[h], plot_kwargs
+                            )
                             y -= 1
                     else:
-                        interval_plot = _plot_tree(interval_plot, y, quants,
-                                                   quartiles, colors[h],
-                                                   plot_kwargs)
+                        interval_plot = _plot_tree(
+                            interval_plot, y, quants, quartiles, colors[h], plot_kwargs
+                        )
 
                 # Genenerate Gelman-Rubin plot
                 if plot_rhat[h] and v in tr.varnames:
                     R = gelman_rubin(tr, [v])
                     if k > 1:
-                        Rval = dict2pd(R, 'rhat').values
-                        gr_plot.plot([min(r, 2) for r in Rval],
-                                     [-(j + var) for j in range(k)], 'o',
-                                     color=colors[h], markersize=4)
+                        Rval = dict2pd(R, "rhat").values
+                        gr_plot.plot(
+                            [min(r, 2) for r in Rval],
+                            [-(j + var) for j in range(k)],
+                            "o",
+                            color=colors[h],
+                            markersize=4,
+                        )
                     else:
-                        gr_plot.plot(min(R[v], 2), -var, 'o', color=colors[h],
-                                     markersize=4)
+                        gr_plot.plot(
+                            min(R[v], 2), -var, "o", color=colors[h], markersize=4
+                        )
                 var += k
 
         if len(trace) > 1:
-            interval_plot.axhspan(var_old, y - chain_spacing - 0.5,
-                                  facecolor='k', alpha=bands[v_idx])
+            interval_plot.axhspan(
+                var_old, y - chain_spacing - 0.5, facecolor="k", alpha=bands[v_idx]
+            )
             if np.any(plot_rhat):
-                gr_plot.axhspan(var_old, y - chain_spacing - 0.5,
-                                facecolor='k', alpha=bands[v_idx])
+                gr_plot.axhspan(
+                    var_old, y - chain_spacing - 0.5, facecolor="k", alpha=bands[v_idx]
+                )
             var_old = y - chain_spacing - 0.5
 
     if ylabels is not None:
@@ -280,19 +319,19 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
     gs.update(left=left_margin, right=0.95, top=0.9, bottom=0.05)
 
     # Define range of y-axis for forestplot and R-hat
-    interval_plot.set_ylim(- var + 0.5, 0.5)
+    interval_plot.set_ylim(-var + 0.5, 0.5)
     if np.any(plot_rhat):
-        gr_plot.set_ylim(- var + 0.5, 0.5)
+        gr_plot.set_ylim(-var + 0.5, 0.5)
 
     plotrange = [np.min(all_quants), np.max(all_quants)]
     datarange = plotrange[1] - plotrange[0]
-    interval_plot.set_xlim(plotrange[0] - 0.05 * datarange,
-                           plotrange[1] + 0.05 * datarange)
+    interval_plot.set_xlim(
+        plotrange[0] - 0.05 * datarange, plotrange[1] + 0.05 * datarange
+    )
 
     # Add variable labels
-    interval_plot.set_yticks([- l for l in range(len(labels))])
-    interval_plot.set_yticklabels(labels,
-                                  fontsize=plot_kwargs.get('fontsize', None))
+    interval_plot.set_yticks([-l for l in range(len(labels))])
+    interval_plot.set_yticklabels(labels, fontsize=plot_kwargs.get("fontsize", None))
 
     # Add title
     if main is None:
@@ -302,8 +341,7 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
     else:
         plot_title = ""
 
-    interval_plot.set_title(plot_title,
-                            fontsize=plot_kwargs.get('fontsize', None))
+    interval_plot.set_title(plot_title, fontsize=plot_kwargs.get("fontsize", None))
 
     # Add x-axis label
     if xtitle is not None:
@@ -319,10 +357,10 @@ def forestplot(trace, models=None, varnames=None, transform=identity_transform,
         ticks.tick2On = False
 
     for loc, spine in interval_plot.spines.items():
-        if loc in ['left', 'right']:
-            spine.set_color('none')  # don't draw spine
+        if loc in ["left", "right"]:
+            spine.set_color("none")  # don't draw spine
 
     # Reference line
-    interval_plot.axvline(vline, color='k', linestyle=':')
+    interval_plot.axvline(vline, color="k", linestyle=":")
 
     return gs
diff --git a/pymc3/plots/kdeplot.py b/pymc3/plots/kdeplot.py
index be8fa48c1e..ac79672d2a 100644
--- a/pymc3/plots/kdeplot.py
+++ b/pymc3/plots/kdeplot.py
@@ -78,7 +78,7 @@ def fast_kde(x, bw=4.5):
     dx = (xmax - xmin) / (nx - 1)
     std_x = entropy((x - xmin) / dx) * bw
     if ~np.isfinite(std_x):
-        std_x = 0.
+        std_x = 0.0
     grid, _ = np.histogram(x, bins=nx)
 
     scotts_factor = n ** (-0.2)
@@ -86,8 +86,8 @@ def fast_kde(x, bw=4.5):
     kernel = gaussian(kern_nx, scotts_factor * std_x)
 
     npad = min(nx, 2 * kern_nx)
-    grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]])
-    density = convolve(grid, kernel, mode='same')[npad: npad + nx]
+    grid = np.concatenate([grid[npad:0:-1], grid, grid[nx : nx - npad : -1]])
+    density = convolve(grid, kernel, mode="same")[npad : npad + nx]
 
     norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5
 
diff --git a/pymc3/plots/pairplot.py b/pymc3/plots/pairplot.py
index 516e1117f2..9c5669a0ca 100644
--- a/pymc3/plots/pairplot.py
+++ b/pymc3/plots/pairplot.py
@@ -9,10 +9,20 @@
 from .artists import get_trace_dict, scale_text
 
 
-def pairplot(trace, varnames=None, figsize=None, text_size=None,
-             gs=None, ax=None, hexbin=False, plot_transformed=False,
-             divergences=False, kwargs_divergence=None,
-             sub_varnames=None, **kwargs):
+def pairplot(
+    trace,
+    varnames=None,
+    figsize=None,
+    text_size=None,
+    gs=None,
+    ax=None,
+    hexbin=False,
+    plot_transformed=False,
+    divergences=False,
+    kwargs_divergence=None,
+    sub_varnames=None,
+    **kwargs
+):
     """
     Plot a scatter or hexbin matrix of the sampled parameters.
 
@@ -55,8 +65,11 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None,
         if plot_transformed:
 
             varnames_copy = list(trace.varnames)
-            remove = [get_untransformed_name(var) for var in trace.varnames
-                      if is_transformed_name(var)]
+            remove = [
+                get_untransformed_name(var)
+                for var in trace.varnames
+                if is_transformed_name(var)
+            ]
 
             try:
                 [varnames_copy.remove(i) for i in remove]
@@ -65,21 +78,21 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None,
                 varnames = varnames_copy
 
             trace_dict = get_trace_dict(
-                trace, get_default_varnames(
-                    varnames, plot_transformed))
+                trace, get_default_varnames(varnames, plot_transformed)
+            )
 
         else:
             trace_dict = get_trace_dict(
-                trace, get_default_varnames(
-                    trace.varnames, plot_transformed))
+                trace, get_default_varnames(trace.varnames, plot_transformed)
+            )
 
         if sub_varnames is None:
             varnames = list(trace_dict.keys())
 
         else:
             trace_dict = get_trace_dict(
-                trace, get_default_varnames(
-                    trace.varnames, True))
+                trace, get_default_varnames(trace.varnames, True)
+            )
             varnames = sub_varnames
 
     else:
@@ -98,30 +111,30 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None,
         figsize = (8 + numvars, 8 + numvars)
 
     if numvars < 2:
-        raise Exception(
-            'Number of variables to be plotted must be 2 or greater.')
+        raise Exception("Number of variables to be plotted must be 2 or greater.")
 
     if numvars == 2 and ax is not None:
         if hexbin:
-            ax.hexbin(trace_dict[varnames[0]],
-                      trace_dict[varnames[1]], mincnt=1, **kwargs)
+            ax.hexbin(
+                trace_dict[varnames[0]], trace_dict[varnames[1]], mincnt=1, **kwargs
+            )
         else:
-            ax.scatter(trace_dict[varnames[0]],
-                       trace_dict[varnames[1]], **kwargs)
+            ax.scatter(trace_dict[varnames[0]], trace_dict[varnames[1]], **kwargs)
 
         if divergences:
             try:
-                divergent = trace['diverging']
+                divergent = trace["diverging"]
             except KeyError:
-                warnings.warn('No divergences were found.')
-
-            diverge = (divergent == 1)
-            ax.scatter(trace_dict[varnames[0]][diverge],
-                       trace_dict[varnames[1]][diverge], **kwargs_divergence)
-        ax.set_xlabel('{}'.format(varnames[0]),
-                      fontsize=text_size)
-        ax.set_ylabel('{}'.format(
-            varnames[1]), fontsize=text_size)
+                warnings.warn("No divergences were found.")
+
+            diverge = divergent == 1
+            ax.scatter(
+                trace_dict[varnames[0]][diverge],
+                trace_dict[varnames[1]][diverge],
+                **kwargs_divergence
+            )
+        ax.set_xlabel("{}".format(varnames[0]), fontsize=text_size)
+        ax.set_ylabel("{}".format(varnames[1]), fontsize=text_size)
         ax.tick_params(labelsize=text_size)
 
     if gs is None and ax is None:
@@ -143,26 +156,22 @@ def pairplot(trace, varnames=None, figsize=None, text_size=None,
 
                 if divergences:
                     try:
-                        divergent = trace['diverging']
+                        divergent = trace["diverging"]
                     except KeyError:
-                        warnings.warn('No divergences were found.')
+                        warnings.warn("No divergences were found.")
                         return ax
 
-                    diverge = (divergent == 1)
-                    ax.scatter(var1[diverge],
-                               var2[diverge],
-                               **kwargs_divergence)
+                    diverge = divergent == 1
+                    ax.scatter(var1[diverge], var2[diverge], **kwargs_divergence)
 
                 if j + 1 != numvars - 1:
                     ax.set_xticks([])
                 else:
-                    ax.set_xlabel('{}'.format(varnames[i]),
-                                  fontsize=text_size)
+                    ax.set_xlabel("{}".format(varnames[i]), fontsize=text_size)
                 if i != 0:
                     ax.set_yticks([])
                 else:
-                    ax.set_ylabel('{}'.format(
-                        varnames[j + 1]), fontsize=text_size)
+                    ax.set_ylabel("{}".format(varnames[j + 1]), fontsize=text_size)
 
                 ax.tick_params(labelsize=text_size)
 
diff --git a/pymc3/plots/posteriorplot.py b/pymc3/plots/posteriorplot.py
index d2ae8e5151..d3da5dad6e 100644
--- a/pymc3/plots/posteriorplot.py
+++ b/pymc3/plots/posteriorplot.py
@@ -9,9 +9,23 @@
 from .utils import identity_transform, get_default_varnames
 
 
-def plot_posterior(trace, varnames=None, transform=identity_transform, figsize=None, text_size=None,
-                   alpha_level=0.05, round_to=3, point_estimate='mean', rope=None,
-                   ref_val=None, kde_plot=False, plot_transformed=False, bw=4.5, ax=None, **kwargs):
+def plot_posterior(
+    trace,
+    varnames=None,
+    transform=identity_transform,
+    figsize=None,
+    text_size=None,
+    alpha_level=0.05,
+    round_to=3,
+    point_estimate="mean",
+    rope=None,
+    ref_val=None,
+    kde_plot=False,
+    plot_transformed=False,
+    bw=4.5,
+    ax=None,
+    **kwargs
+):
     """Plot Posterior densities in style of John K. Kruschke book.
 
     Parameters
@@ -83,10 +97,19 @@ def create_axes_grid(figsize, traces):
         if ax is None:
             fig, ax = plt.subplots(figsize=figsize)
 
-
-        plot_posterior_op(transform(trace), ax=ax, bw=bw, kde_plot=kde_plot,
-                          point_estimate=point_estimate, round_to=round_to, alpha_level=alpha_level,
-                          ref_val=ref_val, rope=rope, text_size=scale_text(figsize, text_size), **kwargs)
+        plot_posterior_op(
+            transform(trace),
+            ax=ax,
+            bw=bw,
+            kde_plot=kde_plot,
+            point_estimate=point_estimate,
+            round_to=round_to,
+            alpha_level=alpha_level,
+            ref_val=ref_val,
+            rope=rope,
+            text_size=scale_text(figsize, text_size),
+            **kwargs
+        )
 
     else:
         if varnames is None:
@@ -110,10 +133,19 @@ def create_axes_grid(figsize, traces):
 
         for idx, (a, v) in enumerate(zip(np.atleast_1d(ax), trace_dict)):
             tr_values = transform(trace_dict[v])
-            plot_posterior_op(tr_values, ax=a, bw=bw, kde_plot=kde_plot,
-                              point_estimate=point_estimate, round_to=round_to,
-                              alpha_level=alpha_level, ref_val=ref_val[idx],
-                              rope=rope[idx], text_size=scale_text(figsize, text_size), **kwargs)
+            plot_posterior_op(
+                tr_values,
+                ax=a,
+                bw=bw,
+                kde_plot=kde_plot,
+                point_estimate=point_estimate,
+                round_to=round_to,
+                alpha_level=alpha_level,
+                ref_val=ref_val[idx],
+                rope=rope[idx],
+                text_size=scale_text(figsize, text_size),
+                **kwargs
+            )
             a.set_title(v, fontsize=scale_text(figsize, text_size))
 
         plt.tight_layout()
@@ -137,21 +169,21 @@ def plot_posterior_predictive_glm(trace, eval=None, lm=None, samples=30, **kwarg
     Additional keyword arguments are passed to pylab.plot().
     """
     if lm is None:
-        lm = lambda x, sample: sample['Intercept'] + sample['x'] * x
+        lm = lambda x, sample: sample["Intercept"] + sample["x"] * x
 
     if eval is None:
         eval = np.linspace(0, 1, 100)
 
     # Set default plotting arguments
-    if 'lw' not in kwargs and 'linewidth' not in kwargs:
-        kwargs['lw'] = .2
-    if 'c' not in kwargs and 'color' not in kwargs:
-        kwargs['c'] = 'k'
+    if "lw" not in kwargs and "linewidth" not in kwargs:
+        kwargs["lw"] = 0.2
+    if "c" not in kwargs and "color" not in kwargs:
+        kwargs["c"] = "k"
 
     for rand_loc in np.random.randint(0, len(trace), samples):
         rand_sample = trace[rand_loc]
         plt.plot(eval, lm(eval, rand_sample), **kwargs)
-    # Make sure to not plot label multiple times
-        kwargs.pop('label', None)
+        # Make sure to not plot label multiple times
+        kwargs.pop("label", None)
 
-    plt.title('Posterior predictive')
+    plt.title("Posterior predictive")
diff --git a/pymc3/plots/traceplot.py b/pymc3/plots/traceplot.py
index adbd2b732c..1f3b022f51 100644
--- a/pymc3/plots/traceplot.py
+++ b/pymc3/plots/traceplot.py
@@ -8,10 +8,26 @@
 from .utils import identity_transform, get_default_varnames, get_axis, make_2d
 
 
-def traceplot(trace, varnames=None, transform=identity_transform, figsize=None, lines=None,
-              combined=False, plot_transformed=False, grid=False, alpha=0.35, priors=None,
-              prior_alpha=1, prior_style='--', bw=4.5, ax=None, live_plot=False,
-              skip_first=0, refresh_every=100, roll_over=1000):
+def traceplot(
+    trace,
+    varnames=None,
+    transform=identity_transform,
+    figsize=None,
+    lines=None,
+    combined=False,
+    plot_transformed=False,
+    grid=False,
+    alpha=0.35,
+    priors=None,
+    prior_alpha=1,
+    prior_style="--",
+    bw=4.5,
+    ax=None,
+    live_plot=False,
+    skip_first=0,
+    refresh_every=100,
+    roll_over=1000,
+):
     """Plot samples histograms and values.
 
     Parameters
@@ -107,11 +123,13 @@ def traceplot(trace, varnames=None, transform=identity_transform, figsize=None,
                         x0 = len(d) - roll_over + skip_first
                     d_stream = d[-roll_over:]
             width = len(d_stream)
-            if d.dtype.kind == 'i':
+            if d.dtype.kind == "i":
                 hist_objs = histplot_op(ax[i, 0], d, alpha=alpha)
                 colors = [h[-1][0].get_facecolor() for h in hist_objs]
             else:
-                artists = kdeplot_op(ax[i, 0], d, bw, prior, prior_alpha, prior_style)[0]
+                artists = kdeplot_op(ax[i, 0], d, bw, prior, prior_alpha, prior_style)[
+                    0
+                ]
                 colors = [a[0].get_color() for a in artists]
             ax[i, 0].set_title(str(v))
             ax[i, 0].grid(grid)
@@ -124,17 +142,18 @@ def traceplot(trace, varnames=None, transform=identity_transform, figsize=None,
             if lines:
                 try:
                     if isinstance(lines[v], (float, int)):
-                        line_values, colors = [lines[v]], ['r']
+                        line_values, colors = [lines[v]], ["r"]
                     else:
                         line_values = np.atleast_1d(lines[v]).ravel()
                         if len(colors) != len(line_values):
-                            raise AssertionError("An incorrect number of lines was specified for "
-                                                 "'{}'. Expected an iterable of length {} or to "
-                                                 " a scalar".format(v, len(colors)))
+                            raise AssertionError(
+                                "An incorrect number of lines was specified for "
+                                "'{}'. Expected an iterable of length {} or to "
+                                " a scalar".format(v, len(colors))
+                            )
                     for c, l in zip(colors, line_values):
                         ax[i, 0].axvline(x=l, color=c, lw=1.5, alpha=0.75)
-                        ax[i, 1].axhline(y=l, color=c,
-                                         lw=1.5, alpha=alpha)
+                        ax[i, 1].axhline(y=l, color=c, lw=1.5, alpha=alpha)
                 except KeyError:
                     pass
         if live_plot:
diff --git a/pymc3/plots/utils.py b/pymc3/plots/utils.py
index 9b96931936..5fb443b688 100644
--- a/pymc3/plots/utils.py
+++ b/pymc3/plots/utils.py
@@ -3,8 +3,9 @@
 except ImportError:  # mpl is optional
     pass
 import numpy as np
+
 # plotting utilities can all be in this namespace
-from ..util import get_default_varnames # pylint: disable=unused-import
+from ..util import get_default_varnames  # pylint: disable=unused-import
 
 
 def identity_transform(x):
@@ -29,7 +30,7 @@ def get_axis(ax, default_rows, default_columns, **default_kwargs):
     if ax is None:
         _, ax = plt.subplots(*default_shape, **default_kwargs)
     elif ax.shape != default_shape:
-        raise ValueError('Subplots with shape %r required' % (default_shape,))
+        raise ValueError("Subplots with shape %r required" % (default_shape,))
     return ax
 
 
@@ -39,5 +40,5 @@ def make_2d(a):
     # flatten out dimensions beyond the first
     n = a.shape[0]
     newshape = np.product(a.shape[1:]).astype(int)
-    a = a.reshape((n, newshape), order='F')
+    a = a.reshape((n, newshape), order="F")
     return a
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index ac54def005..c0da49656b 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -14,10 +14,24 @@
 from .backends.ndarray import NDArray
 from .distributions.distribution import draw_values
 from .model import modelcontext, Point, all_continuous
-from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
-                           BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
-                           Slice, CompoundStep, arraystep, smc)
-from .util import update_start_vals, get_untransformed_name, is_transformed_name, get_default_varnames
+from .step_methods import (
+    NUTS,
+    HamiltonianMC,
+    Metropolis,
+    BinaryMetropolis,
+    BinaryGibbsMetropolis,
+    CategoricalGibbsMetropolis,
+    Slice,
+    CompoundStep,
+    arraystep,
+    smc,
+)
+from .util import (
+    update_start_vals,
+    get_untransformed_name,
+    is_transformed_name,
+    get_default_varnames,
+)
 from .vartypes import discrete_types
 from pymc3.step_methods.hmc import quadpotential
 from pymc3 import plots
@@ -25,17 +39,32 @@
 from tqdm import tqdm
 
 import sys
+
 sys.setrecursionlimit(10000)
 
-__all__ = ['sample', 'iter_sample', 'sample_posterior_predictive',
-           'sample_posterior_predictive_w', 'init_nuts',
-           'sample_prior_predictive', 'sample_ppc', 'sample_ppc_w']
+__all__ = [
+    "sample",
+    "iter_sample",
+    "sample_posterior_predictive",
+    "sample_posterior_predictive_w",
+    "init_nuts",
+    "sample_prior_predictive",
+    "sample_ppc",
+    "sample_ppc_w",
+]
 
-STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
-                BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
+STEP_METHODS = (
+    NUTS,
+    HamiltonianMC,
+    Metropolis,
+    BinaryMetropolis,
+    BinaryGibbsMetropolis,
+    Slice,
+    CategoricalGibbsMetropolis,
+)
 
 
-_log = logging.getLogger('pymc3')
+_log = logging.getLogger("pymc3")
 
 
 def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
@@ -76,7 +105,7 @@ def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
 
     unused_args = set(step_kwargs).difference(used_keys)
     if unused_args:
-        raise ValueError('Unused step method arguments: %s' % unused_args)
+        raise ValueError("Unused step method arguments: %s" % unused_args)
 
     if len(steps) == 1:
         steps = steps[0]
@@ -84,8 +113,7 @@ def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
     return steps
 
 
-def assign_step_methods(model, step=None, methods=STEP_METHODS,
-                        step_kwargs=None):
+def assign_step_methods(model, step=None, methods=STEP_METHODS, step_kwargs=None):
     """Assign model variables to appropriate step methods.
 
     Passing a specified model will auto-assign its constituent stochastic
@@ -140,14 +168,15 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS,
             if has_gradient:
                 try:
                     tg.grad(model.logpt, var)
-                except (AttributeError,
-                        NotImplementedError,
-                        tg.NullTypeGradError):
+                except (AttributeError, NotImplementedError, tg.NullTypeGradError):
                     has_gradient = False
             # select the best method
-            selected = max(methods, key=lambda method,
-                           var=var, has_gradient=has_gradient:
-                           method._competence(var, has_gradient))
+            selected = max(
+                methods,
+                key=lambda method, var=var, has_gradient=has_gradient: method._competence(
+                    var, has_gradient
+                ),
+            )
             selected_steps[selected].append(var)
 
     return instantiate_steppers(model, steps, selected_steps, step_kwargs)
@@ -155,17 +184,23 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS,
 
 def _print_step_hierarchy(s, level=0):
     if isinstance(s, (list, tuple)):
-        _log.info('>' * level + 'list')
+        _log.info(">" * level + "list")
         for i in s:
-            _print_step_hierarchy(i, level+1)
+            _print_step_hierarchy(i, level + 1)
     elif isinstance(s, CompoundStep):
-        _log.info('>' * level + 'CompoundStep')
+        _log.info(">" * level + "CompoundStep")
         for i in s.methods:
-            _print_step_hierarchy(i, level+1)
+            _print_step_hierarchy(i, level + 1)
     else:
-        varnames = ', '.join([get_untransformed_name(v.name) if is_transformed_name(v.name)
-                              else v.name for v in s.vars])
-        _log.info('>' * level + '{}: [{}]'.format(s.__class__.__name__, varnames))
+        varnames = ", ".join(
+            [
+                get_untransformed_name(v.name)
+                if is_transformed_name(v.name)
+                else v.name
+                for v in s.vars
+            ]
+        )
+        _log.info(">" * level + "{}: [{}]".format(s.__class__.__name__, varnames))
 
 
 def _cpu_count():
@@ -177,9 +212,11 @@ def _cpu_count():
     """
     try:
         import psutil
+
         cpus = psutil.cpu_count(False)
     except ImportError:
         import multiprocessing
+
         try:
             cpus = multiprocessing.cpu_count() // 2
         except NotImplementedError:
@@ -189,10 +226,29 @@ def _cpu_count():
     return cpus
 
 
-def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=None, chain_idx=0,
-           chains=None, cores=None, tune=500, nuts_kwargs=None, step_kwargs=None, progressbar=True,
-           model=None, random_seed=None, live_plot=False, discard_tuned_samples=True,
-           live_plot_kwargs=None, compute_convergence_checks=True, use_mmap=False, **kwargs):
+def sample(
+    draws=500,
+    step=None,
+    init="auto",
+    n_init=200000,
+    start=None,
+    trace=None,
+    chain_idx=0,
+    chains=None,
+    cores=None,
+    tune=500,
+    nuts_kwargs=None,
+    step_kwargs=None,
+    progressbar=True,
+    model=None,
+    random_seed=None,
+    live_plot=False,
+    discard_tuned_samples=True,
+    live_plot_kwargs=None,
+    compute_convergence_checks=True,
+    use_mmap=False,
+    **kwargs
+):
     """Draw samples from the posterior using the given step methods.
 
     Multiple step methods are supported via compound step methods.
@@ -323,25 +379,29 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
     if isinstance(step, pm.step_methods.smc.SMC):
         if step_kwargs is None:
             step_kwargs = {}
-        test_folder = mkdtemp(prefix='SMC_TEST')
-        trace = smc.sample_smc(draws=draws,
-                               step=step,
-                               progressbar=progressbar,
-                               model=model,
-                               random_seed=random_seed)
+        test_folder = mkdtemp(prefix="SMC_TEST")
+        trace = smc.sample_smc(
+            draws=draws,
+            step=step,
+            progressbar=progressbar,
+            model=model,
+            random_seed=random_seed,
+        )
     else:
         if cores is None:
             cores = min(4, _cpu_count())
-        if 'njobs' in kwargs:
-            cores = kwargs['njobs']
+        if "njobs" in kwargs:
+            cores = kwargs["njobs"]
             warnings.warn(
                 "The njobs argument has been deprecated. Use cores instead.",
-                DeprecationWarning)
-        if 'nchains' in kwargs:
-            chains = kwargs['nchains']
+                DeprecationWarning,
+            )
+        if "nchains" in kwargs:
+            chains = kwargs["nchains"]
             warnings.warn(
                 "The nchains argument has been deprecated. Use chains instead.",
-                DeprecationWarning)
+                DeprecationWarning,
+            )
         if chains is None:
             chains = max(2, cores)
         if isinstance(start, dict):
@@ -356,12 +416,14 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
             random_seed = [np.random.randint(2 ** 30) for _ in range(chains)]
         if not isinstance(random_seed, Iterable):
             raise TypeError(
-                'Invalid value for `random_seed`. Must be tuple, list or int')
-        if 'chain' in kwargs:
-            chain_idx = kwargs['chain']
+                "Invalid value for `random_seed`. Must be tuple, list or int"
+            )
+        if "chain" in kwargs:
+            chain_idx = kwargs["chain"]
             warnings.warn(
                 "The chain argument has been deprecated. Use chain_idx instead.",
-                DeprecationWarning)
+                DeprecationWarning,
+            )
 
         if start is not None:
             for start_vals in start:
@@ -380,27 +442,35 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
         if nuts_kwargs is not None:
             if step_kwargs is not None:
                 raise ValueError("Specify only one of step_kwargs and nuts_kwargs")
-            step_kwargs = {'nuts': nuts_kwargs}
+            step_kwargs = {"nuts": nuts_kwargs}
 
         if model.ndim == 0:
-            raise ValueError('The model does not contain any free variables.')
+            raise ValueError("The model does not contain any free variables.")
 
         if step is None and init is not None and all_continuous(model.vars):
             try:
                 # By default, try to use NUTS
-                _log.info('Auto-assigning NUTS sampler...')
+                _log.info("Auto-assigning NUTS sampler...")
                 args = step_kwargs if step_kwargs is not None else {}
-                args = args.get('nuts', {})
-                start_, step = init_nuts(init=init, chains=chains, n_init=n_init,
-                                         model=model, random_seed=random_seed,
-                                         progressbar=progressbar, **args)
+                args = args.get("nuts", {})
+                start_, step = init_nuts(
+                    init=init,
+                    chains=chains,
+                    n_init=n_init,
+                    model=model,
+                    random_seed=random_seed,
+                    progressbar=progressbar,
+                    **args
+                )
                 if start is None:
                     start = start_
             except (AttributeError, NotImplementedError, tg.NullTypeGradError):
                 # gradient computation failed
-                _log.info("Initializing NUTS failed. "
-                          "Falling back to elementwise auto-assignment.")
-                _log.debug('Exception in init nuts', exec_info=True)
+                _log.info(
+                    "Initializing NUTS failed. "
+                    "Falling back to elementwise auto-assignment."
+                )
+                _log.debug("Exception in init nuts", exec_info=True)
                 step = assign_step_methods(model, step, step_kwargs=step_kwargs)
         else:
             step = assign_step_methods(model, step, step_kwargs=step_kwargs)
@@ -412,50 +482,58 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
         if isinstance(start, dict):
             start = [start] * chains
 
-        sample_args = {'draws': draws,
-                       'step': step,
-                       'start': start,
-                       'trace': trace,
-                       'chain': chain_idx,
-                       'chains': chains,
-                       'tune': tune,
-                       'progressbar': progressbar,
-                       'model': model,
-                       'random_seed': random_seed,
-                       'live_plot': live_plot,
-                       'live_plot_kwargs': live_plot_kwargs,
-                       'cores': cores,
-                       'use_mmap': use_mmap}
+        sample_args = {
+            "draws": draws,
+            "step": step,
+            "start": start,
+            "trace": trace,
+            "chain": chain_idx,
+            "chains": chains,
+            "tune": tune,
+            "progressbar": progressbar,
+            "model": model,
+            "random_seed": random_seed,
+            "live_plot": live_plot,
+            "live_plot_kwargs": live_plot_kwargs,
+            "cores": cores,
+            "use_mmap": use_mmap,
+        }
 
         sample_args.update(kwargs)
 
-        has_population_samplers = np.any([isinstance(m, arraystep.PopulationArrayStepShared)
-            for m in (step.methods if isinstance(step, CompoundStep) else [step])])
+        has_population_samplers = np.any(
+            [
+                isinstance(m, arraystep.PopulationArrayStepShared)
+                for m in (step.methods if isinstance(step, CompoundStep) else [step])
+            ]
+        )
 
         parallel = cores > 1 and chains > 1 and not has_population_samplers
         if parallel:
-            _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, cores))
+            _log.info(
+                "Multiprocess sampling ({} chains in {} jobs)".format(chains, cores)
+            )
             _print_step_hierarchy(step)
             try:
                 trace = _mp_sample(**sample_args)
             except pickle.PickleError:
                 _log.warning("Could not pickle model, sampling singlethreaded.")
-                _log.debug('Pickling error:', exec_info=True)
+                _log.debug("Pickling error:", exec_info=True)
                 parallel = False
             except AttributeError as e:
                 if str(e).startswith("AttributeError: Can't pickle"):
                     _log.warning("Could not pickle model, sampling singlethreaded.")
-                    _log.debug('Pickling error:', exec_info=True)
+                    _log.debug("Pickling error:", exec_info=True)
                     parallel = False
                 else:
                     raise
         if not parallel:
             if has_population_samplers:
-                _log.info('Population sampling ({} chains)'.format(chains))
+                _log.info("Population sampling ({} chains)".format(chains))
                 _print_step_hierarchy(step)
                 trace = _sample_population(**sample_args)
             else:
-                _log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
+                _log.info("Sequential sampling ({} chains in 1 job)".format(chains))
                 _print_step_hierarchy(step)
                 trace = _sample_many(**sample_args)
 
@@ -463,8 +541,10 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
         trace = trace[discard:]
 
         if compute_convergence_checks:
-            if draws-tune < 100:
-                warnings.warn("The number of samples is too small to check convergence reliably.")
+            if draws - tune < 100:
+                warnings.warn(
+                    "The number of samples is too small to check convergence reliably."
+                )
             else:
                 trace.report._run_convergence_checks(trace, model)
 
@@ -476,7 +556,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
 def _check_start_shape(model, start):
     if not isinstance(start, dict):
         raise TypeError("start argument must be a dict or an array-like of dicts")
-    e = ''
+    e = ""
     for var in model.vars:
         if var.name in start.keys():
             var_shape = var.shape.tag.test_value
@@ -490,23 +570,28 @@ def _check_start_shape(model, start):
             else:
                 # if model var has a specified shape
                 if var_shape:
-                    e += "\nExpected shape {} for var " \
-                         "'{}', got scalar {}".format(
-                             tuple(var_shape), var.name, start[var.name]
-                         )
+                    e += "\nExpected shape {} for var " "'{}', got scalar {}".format(
+                        tuple(var_shape), var.name, start[var.name]
+                    )
 
-    if e != '':
+    if e != "":
         raise ValueError("Bad shape for start argument:{}".format(e))
 
 
 def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs):
     traces = []
     for i in range(chains):
-        trace = _sample(draws=draws, chain=chain + i, start=start[i],
-                        step=step, random_seed=random_seed[i], **kwargs)
+        trace = _sample(
+            draws=draws,
+            chain=chain + i,
+            start=start[i],
+            step=step,
+            random_seed=random_seed[i],
+            **kwargs
+        )
         if trace is None:
             if len(traces) == 0:
-                raise ValueError('Sampling stopped before a sample was created.')
+                raise ValueError("Sampling stopped before a sample was created.")
             else:
                 break
         elif len(trace) < draws:
@@ -518,12 +603,31 @@ def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs):
     return MultiTrace(traces)
 
 
-def _sample_population(draws, chain, chains, start, random_seed, step, tune,
-                       model, progressbar=None, parallelize=False, **kwargs):
+def _sample_population(
+    draws,
+    chain,
+    chains,
+    start,
+    random_seed,
+    step,
+    tune,
+    model,
+    progressbar=None,
+    parallelize=False,
+    **kwargs
+):
     # create the generator that iterates all chains in parallel
     chains = [chain + c for c in range(chains)]
-    sampling = _prepare_iter_population(draws, chains, step, start, parallelize,
-                                        tune=tune, model=model, random_seed=random_seed)
+    sampling = _prepare_iter_population(
+        draws,
+        chains,
+        step,
+        start,
+        parallelize,
+        tune=tune,
+        model=model,
+        random_seed=random_seed,
+    )
 
     if progressbar:
         sampling = tqdm(sampling, total=draws)
@@ -535,14 +639,24 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune,
     return MultiTrace(latest_traces)
 
 
-def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
-            trace=None, tune=None, model=None, live_plot=False,
-            live_plot_kwargs=None, **kwargs):
-    skip_first = kwargs.get('skip_first', 0)
-    refresh_every = kwargs.get('refresh_every', 100)
-
-    sampling = _iter_sample(draws, step, start, trace, chain,
-                            tune, model, random_seed)
+def _sample(
+    chain,
+    progressbar,
+    random_seed,
+    start,
+    draws=None,
+    step=None,
+    trace=None,
+    tune=None,
+    model=None,
+    live_plot=False,
+    live_plot_kwargs=None,
+    **kwargs
+):
+    skip_first = kwargs.get("skip_first", 0)
+    refresh_every = kwargs.get("refresh_every", 100)
+
+    sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
     if progressbar:
         sampling = tqdm(sampling, total=draws)
     try:
@@ -556,7 +670,9 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
                     if it == skip_first:
                         ax = plots.traceplot(trace, live_plot=False, **live_plot_kwargs)
                     elif (it - skip_first) % refresh_every == 0 or it == draws - 1:
-                        plots.traceplot(trace, ax=ax, live_plot=True, **live_plot_kwargs)
+                        plots.traceplot(
+                            trace, ax=ax, live_plot=True, **live_plot_kwargs
+                        )
     except KeyboardInterrupt:
         pass
     finally:
@@ -565,8 +681,16 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
     return strace
 
 
-def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
-                model=None, random_seed=None):
+def iter_sample(
+    draws,
+    step,
+    start=None,
+    trace=None,
+    chain=0,
+    tune=None,
+    model=None,
+    random_seed=None,
+):
     """Generator that returns a trace on each iteration using the given
     step method.  Multiple step methods supported via compound step
     method returns the amount of time taken.
@@ -600,20 +724,27 @@ def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
         for trace in iter_sample(500, step):
             ...
     """
-    sampling = _iter_sample(draws, step, start, trace, chain, tune,
-                            model, random_seed)
+    sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
     for i, strace in enumerate(sampling):
-        yield MultiTrace([strace[:i + 1]])
-
-
-def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
-                 model=None, random_seed=None):
+        yield MultiTrace([strace[: i + 1]])
+
+
+def _iter_sample(
+    draws,
+    step,
+    start=None,
+    trace=None,
+    chain=0,
+    tune=None,
+    model=None,
+    random_seed=None,
+):
     model = modelcontext(model)
     draws = int(draws)
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
-        raise ValueError('Argument `draws` must be greater than 0.')
+        raise ValueError("Argument `draws` must be greater than 0.")
 
     if start is None:
         start = {}
@@ -654,7 +785,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
             yield strace
     except KeyboardInterrupt:
         strace.close()
-        if hasattr(step, 'warnings'):
+        if hasattr(step, "warnings"):
             warns = step.warnings()
             strace._add_warnings(warns)
         raise
@@ -663,7 +794,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
         raise
     else:
         strace.close()
-        if hasattr(step, 'warnings'):
+        if hasattr(step, "warnings"):
             warns = step.warnings()
             strace._add_warnings(warns)
 
@@ -692,15 +823,16 @@ def __init__(self, steppers, parallelize):
         if parallelize and sys.version_info >= (3, 4):
             try:
                 # configure a child process for each stepper
-                _log.info('Attempting to parallelize chains.')
+                _log.info("Attempting to parallelize chains.")
                 import multiprocessing
+
                 for c, stepper in enumerate(tqdm(steppers)):
                     slave_end, master_end = multiprocessing.Pipe()
                     stepper_dumps = pickle.dumps(stepper, protocol=4)
                     process = multiprocessing.Process(
                         target=self.__class__._run_slave,
                         args=(c, stepper_dumps, slave_end),
-                        name='ChainWalker{}'.format(c)
+                        name="ChainWalker{}".format(c),
                     )
                     # we want the child process to exit if the parent is terminated
                     process.daemon = True
@@ -712,18 +844,23 @@ def __init__(self, steppers, parallelize):
                     self._processes.append(process)
                 self.is_parallelized = True
             except Exception:
-                _log.info('Population parallelization failed. '
-                          'Falling back to sequential stepping of chains.')
-                _log.debug('Error was: ', exec_info=True)
+                _log.info(
+                    "Population parallelization failed. "
+                    "Falling back to sequential stepping of chains."
+                )
+                _log.debug("Error was: ", exec_info=True)
         else:
             if parallelize:
-                warnings.warn('Population parallelization is only supported '
-                              'on Python 3.4 and higher.  All {} chains will '
-                              'run sequentially on one process.'
-                              .format(self.nchains))
+                warnings.warn(
+                    "Population parallelization is only supported "
+                    "on Python 3.4 and higher.  All {} chains will "
+                    "run sequentially on one process.".format(self.nchains)
+                )
             else:
-                _log.info('Chains are not parallelized. You can enable this by passing '
-                          'pm.sample(parallelize=True).')
+                _log.info(
+                    "Chains are not parallelized. You can enable this by passing "
+                    "pm.sample(parallelize=True)."
+                )
         return super(PopulationStepper, self).__init__()
 
     def __enter__(self):
@@ -738,7 +875,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
                 for process in self._processes:
                     process.join(timeout=3)
             except Exception:
-                _log.warning('Termination failed.')
+                _log.warning("Termination failed.")
         return
 
     @staticmethod
@@ -762,7 +899,9 @@ def _run_slave(c, stepper_dumps, slave_end):
             # but rather a CompoundStep. PopulationArrayStepShared.population
             # has to be updated, therefore we identify the substeppers first.
             population_steppers = []
-            for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]):
+            for sm in (
+                stepper.methods if isinstance(stepper, CompoundStep) else [stepper]
+            ):
                 if isinstance(sm, arraystep.PopulationArrayStepShared):
                     population_steppers.append(sm)
             while True:
@@ -781,7 +920,7 @@ def _run_slave(c, stepper_dumps, slave_end):
                 update = stepper.step(population[c])
                 slave_end.send(update)
         except Exception:
-            _log.exception('ChainWalker{}'.format(c))
+            _log.exception("ChainWalker{}".format(c))
         return
 
     def step(self, tune_stop, population):
@@ -814,8 +953,9 @@ def step(self, tune_stop, population):
         return updates
 
 
-def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
-                             model=None, random_seed=None):
+def _prepare_iter_population(
+    draws, chains, step, start, parallelize, tune=None, model=None, random_seed=None
+):
     """Prepares a PopulationStepper and traces for population sampling.
 
     Returns
@@ -830,7 +970,7 @@ def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
-        raise ValueError('Argument `draws` should be above 0.')
+        raise ValueError("Argument `draws` should be above 0.")
 
     # The initialization of traces, samplers and points must happen in the right order:
     # 1. traces are initialized and update_start_vals configures variable transforms
@@ -862,7 +1002,7 @@ def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
         else:
             chainstep = copy(step)
         # link population samplers to the shared population state
-        for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]):
+        for sm in chainstep.methods if isinstance(step, CompoundStep) else [chainstep]:
             if isinstance(sm, arraystep.PopulationArrayStepShared):
                 sm.link_population(population, c)
         steppers[c] = chainstep
@@ -922,7 +1062,7 @@ def _iter_population(draws, tune, popstep, steppers, traces, points):
     except KeyboardInterrupt:
         for c, strace in enumerate(traces):
             strace.close()
-            if hasattr(steppers[c], 'report'):
+            if hasattr(steppers[c], "report"):
                 steppers[c].report._finalize(strace)
         raise
     except BaseException:
@@ -932,7 +1072,7 @@ def _iter_population(draws, tune, popstep, steppers, traces, points):
     else:
         for c, strace in enumerate(traces):
             strace.close()
-            if hasattr(steppers[c], 'report'):
+            if hasattr(steppers[c], "report"):
                 steppers[c].report._finalize(strace)
 
 
@@ -948,18 +1088,30 @@ def _choose_backend(trace, chain, shortcuts=None, **kwds):
         shortcuts = pm.backends._shortcuts
 
     try:
-        backend = shortcuts[trace]['backend']
-        name = shortcuts[trace]['name']
+        backend = shortcuts[trace]["backend"]
+        name = shortcuts[trace]["name"]
         return backend(name, **kwds)
     except TypeError:
         return NDArray(vars=trace, **kwds)
     except KeyError:
-        raise ValueError('Argument `trace` is invalid.')
-
-
-def _mp_sample(draws, tune, step, chains, cores, chain, random_seed,
-               start, progressbar, trace=None, model=None, use_mmap=False,
-               **kwargs):
+        raise ValueError("Argument `trace` is invalid.")
+
+
+def _mp_sample(
+    draws,
+    tune,
+    step,
+    chains,
+    cores,
+    chain,
+    random_seed,
+    start,
+    progressbar,
+    trace=None,
+    model=None,
+    use_mmap=False,
+    **kwargs
+):
 
     if sys.version_info.major >= 3:
         import pymc3.parallel_sampling as ps
@@ -983,8 +1135,8 @@ def _mp_sample(draws, tune, step, chains, cores, chain, random_seed,
             traces.append(strace)
 
         sampler = ps.ParallelSampler(
-            draws, tune, chains, cores, random_seed, start, step,
-            chain, progressbar)
+            draws, tune, chains, cores, random_seed, start, step, chain, progressbar
+        )
         try:
             with sampler:
                 for draw in sampler:
@@ -1010,9 +1162,16 @@ def _mp_sample(draws, tune, step, chains, cores, chain, random_seed,
         pbars = [progressbar] + [False] * (chains - 1)
         jobs = (
             delayed(_sample)(
-                chain=args[0], progressbar=args[1], random_seed=args[2],
-                start=args[3], draws=draws, step=step, trace=trace,
-                tune=tune, model=model, **kwargs
+                chain=args[0],
+                progressbar=args[1],
+                random_seed=args[2],
+                start=args[3],
+                draws=draws,
+                step=step,
+                trace=trace,
+                tune=tune,
+                model=model,
+                **kwargs
             )
             for args in zip(chain_nums, pbars, random_seed, start)
         )
@@ -1032,7 +1191,7 @@ def _choose_chains(traces, tune):
 
     lengths = [max(0, len(trace) - tune) for trace in traces]
     if not sum(lengths):
-        raise ValueError('Not enough samples to build a trace.')
+        raise ValueError("Not enough samples to build a trace.")
 
     idxs = np.argsort(lengths)[::-1]
     l_sort = np.array(lengths)[idxs]
@@ -1059,8 +1218,15 @@ def stop_tuning(step):
     return step
 
 
-def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size=None,
-                                random_seed=None, progressbar=True):
+def sample_posterior_predictive(
+    trace,
+    samples=None,
+    model=None,
+    vars=None,
+    size=None,
+    random_seed=None,
+    progressbar=True,
+):
     """Generate posterior predictive samples from a model given a trace.
 
     Parameters
@@ -1116,8 +1282,9 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size
     varnames = [var.name for var in vars]
 
     # draw once to inspect the shape
-    var_values = list(zip(varnames,
-                          draw_values(vars, point=model.test_point, size=size)))
+    var_values = list(
+        zip(varnames, draw_values(vars, point=model.test_point, size=size))
+    )
     ppc_trace = defaultdict(list)
     for varname, value in var_values:
         ppc_trace[varname] = np.zeros((samples,) + value.shape, value.dtype)
@@ -1146,13 +1313,14 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size
 
 def sample_ppc(*args, **kwargs):
     """This method is deprecated.  Please use :func:`~sampling.sample_posterior_predictive`"""
-    message = 'sample_ppc() is deprecated.  Please use sample_posterior_predictive()'
+    message = "sample_ppc() is deprecated.  Please use sample_posterior_predictive()"
     warnings.warn(message, DeprecationWarning, stacklevel=2)
     return sample_posterior_predictive(*args, **kwargs)
 
 
-def sample_posterior_predictive_w(traces, samples=None, models=None, weights=None,
-                                    random_seed=None, progressbar=True):
+def sample_posterior_predictive_w(
+    traces, samples=None, models=None, weights=None, random_seed=None, progressbar=True
+):
     """Generate weighted posterior predictive samples from a list of models and
     a list of traces according to a set of weights.
 
@@ -1194,22 +1362,21 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
         weights = [1] * len(traces)
 
     if len(traces) != len(weights):
-        raise ValueError('The number of traces and weights should be the same')
+        raise ValueError("The number of traces and weights should be the same")
 
     if len(models) != len(weights):
-        raise ValueError('The number of models and weights should be the same')
+        raise ValueError("The number of models and weights should be the same")
 
     length_morv = len(models[0].observed_RVs)
     if not all(len(i.observed_RVs) == length_morv for i in models):
-        raise ValueError(
-            'The number of observed RVs should be the same for all models')
+        raise ValueError("The number of observed RVs should be the same for all models")
 
     weights = np.asarray(weights)
     p = weights / np.sum(weights)
 
     min_tr = min([len(i) * i.nchains for i in traces])
 
-    n = (min_tr * p).astype('int')
+    n = (min_tr * p).astype("int")
     # ensure n sum up to min_tr
     idx = np.argmax(n)
     n[idx] = n[idx] + min_tr - np.sum(n)
@@ -1239,7 +1406,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
     if len(lengths) == 1:
         size = [None for i in variables]
     elif len(lengths) > 2:
-        raise ValueError('Observed variables could not be broadcast together')
+        raise ValueError("Observed variables could not be broadcast together")
     else:
         size = []
         x = np.zeros(shape=lengths[0])
@@ -1268,10 +1435,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
             var = variables[idx]
             # TODO sample_posterior_predictive_w is currently only work for model with
             # one observed.
-            ppc[var.name].append(draw_values([var],
-                                             point=param,
-                                             size=size[idx]
-                                             )[0])
+            ppc[var.name].append(draw_values([var], point=param, size=size[idx])[0])
 
     except KeyboardInterrupt:
         pass
@@ -1285,7 +1449,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
 
 def sample_ppc_w(*args, **kwargs):
     """This method is deprecated.  Please use :func:`~sampling.sample_posterior_predictive_w`"""
-    message = 'sample_ppc() is deprecated.  Please use sample_posterior_predictive_w()'
+    message = "sample_ppc() is deprecated.  Please use sample_posterior_predictive_w()"
     warnings.warn(message, DeprecationWarning, stacklevel=2)
     return sample_posterior_predictive_w(*args, **kwargs)
 
@@ -1331,12 +1495,21 @@ def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None
         elif is_transformed_name(var_name):
             untransformed = get_untransformed_name(var_name)
             if untransformed in data:
-                prior[var_name] = model[untransformed].transformation.forward_val(data[untransformed])
+                prior[var_name] = model[untransformed].transformation.forward_val(
+                    data[untransformed]
+                )
     return prior
 
 
-def init_nuts(init='auto', chains=1, n_init=500000, model=None,
-              random_seed=None, progressbar=True, **kwargs):
+def init_nuts(
+    init="auto",
+    chains=1,
+    n_init=500000,
+    model=None,
+    random_seed=None,
+    progressbar=True,
+    **kwargs
+):
     """Set up the mass matrix initialization for NUTS.
 
     NUTS convergence and sampling speed is extremely dependent on the
@@ -1386,42 +1559,40 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
     """
     model = modelcontext(model)
 
-    vars = kwargs.get('vars', model.vars)
+    vars = kwargs.get("vars", model.vars)
     if set(vars) != set(model.vars):
-        raise ValueError('Must use init_nuts on all variables of a model.')
+        raise ValueError("Must use init_nuts on all variables of a model.")
     if not all_continuous(vars):
-        raise ValueError('init_nuts can only be used for models with only '
-                         'continuous variables.')
+        raise ValueError(
+            "init_nuts can only be used for models with only " "continuous variables."
+        )
 
     if not isinstance(init, str):
-        raise TypeError('init must be a string.')
+        raise TypeError("init must be a string.")
 
     if init is not None:
         init = init.lower()
 
-    if init == 'auto':
-        init = 'jitter+adapt_diag'
+    if init == "auto":
+        init = "jitter+adapt_diag"
 
-    _log.info('Initializing NUTS using {}...'.format(init))
+    _log.info("Initializing NUTS using {}...".format(init))
 
     if random_seed is not None:
         random_seed = int(np.atleast_1d(random_seed)[0])
         np.random.seed(random_seed)
 
     cb = [
-        pm.callbacks.CheckParametersConvergence(
-            tolerance=1e-2, diff='absolute'),
-        pm.callbacks.CheckParametersConvergence(
-            tolerance=1e-2, diff='relative'),
+        pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"),
+        pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"),
     ]
 
-    if init == 'adapt_diag':
+    if init == "adapt_diag":
         start = [model.test_point] * chains
         mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
         var = np.ones_like(mean)
-        potential = quadpotential.QuadPotentialDiagAdapt(
-            model.ndim, mean, var, 10)
-    elif init == 'jitter+adapt_diag':
+        potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, var, 10)
+    elif init == "jitter+adapt_diag":
         start = []
         for _ in range(chains):
             mean = {var: val.copy() for var, val in model.test_point.items()}
@@ -1430,12 +1601,13 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
             start.append(mean)
         mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
         var = np.ones_like(mean)
-        potential = quadpotential.QuadPotentialDiagAdapt(
-            model.ndim, mean, var, 10)
-    elif init == 'advi+adapt_diag_grad':
+        potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, var, 10)
+    elif init == "advi+adapt_diag_grad":
         approx = pm.fit(
             random_seed=random_seed,
-            n=n_init, method='advi', model=model,
+            n=n_init,
+            method="advi",
+            model=model,
             callbacks=cb,
             progressbar=progressbar,
             obj_optimizer=pm.adagrad_window,
@@ -1448,11 +1620,14 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
         mean = model.dict_to_array(mean)
         weight = 50
         potential = quadpotential.QuadPotentialDiagAdaptGrad(
-            model.ndim, mean, cov, weight)
-    elif init == 'advi+adapt_diag':
+            model.ndim, mean, cov, weight
+        )
+    elif init == "advi+adapt_diag":
         approx = pm.fit(
             random_seed=random_seed,
-            n=n_init, method='advi', model=model,
+            n=n_init,
+            method="advi",
+            model=model,
             callbacks=cb,
             progressbar=progressbar,
             obj_optimizer=pm.adagrad_window,
@@ -1464,51 +1639,52 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
         mean = approx.bij.rmap(approx.mean.get_value())
         mean = model.dict_to_array(mean)
         weight = 50
-        potential = quadpotential.QuadPotentialDiagAdapt(
-            model.ndim, mean, cov, weight)
-    elif init == 'advi':
+        potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, cov, weight)
+    elif init == "advi":
         approx = pm.fit(
             random_seed=random_seed,
-            n=n_init, method='advi', model=model,
+            n=n_init,
+            method="advi",
+            model=model,
             callbacks=cb,
             progressbar=progressbar,
-            obj_optimizer=pm.adagrad_window
+            obj_optimizer=pm.adagrad_window,
         )  # type: pm.MeanField
         start = approx.sample(draws=chains)
         start = list(start)
         stds = approx.bij.rmap(approx.std.eval())
         cov = model.dict_to_array(stds) ** 2
         potential = quadpotential.QuadPotentialDiag(cov)
-    elif init == 'advi_map':
+    elif init == "advi_map":
         start = pm.find_MAP(include_transformed=True)
         approx = pm.MeanField(model=model, start=start)
         pm.fit(
             random_seed=random_seed,
-            n=n_init, method=pm.KLqp(approx),
+            n=n_init,
+            method=pm.KLqp(approx),
             callbacks=cb,
             progressbar=progressbar,
-            obj_optimizer=pm.adagrad_window
+            obj_optimizer=pm.adagrad_window,
         )
         start = approx.sample(draws=chains)
         start = list(start)
         stds = approx.bij.rmap(approx.std.eval())
         cov = model.dict_to_array(stds) ** 2
         potential = quadpotential.QuadPotentialDiag(cov)
-    elif init == 'map':
+    elif init == "map":
         start = pm.find_MAP(include_transformed=True)
         cov = pm.find_hessian(point=start)
         start = [start] * chains
         potential = quadpotential.QuadPotentialFull(cov)
-    elif init == 'nuts':
-        init_trace = pm.sample(draws=n_init, step=pm.NUTS(),
-                               tune=n_init // 2,
-                               random_seed=random_seed)
+    elif init == "nuts":
+        init_trace = pm.sample(
+            draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed
+        )
         cov = np.atleast_1d(pm.trace_cov(init_trace))
         start = list(np.random.choice(init_trace, chains))
         potential = quadpotential.QuadPotentialFull(cov)
     else:
-        raise ValueError(
-            'Unknown initializer: {}.'.format(init))
+        raise ValueError("Unknown initializer: {}.".format(init))
 
     step = pm.NUTS(potential=potential, model=model, **kwargs)
 
diff --git a/pymc3/stats.py b/pymc3/stats.py
index 445b54b0c7..cb20348d76 100644
--- a/pymc3/stats.py
+++ b/pymc3/stats.py
@@ -16,14 +16,25 @@
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-if pkg_resources.get_distribution('scipy').version < '1.0.0':
+if pkg_resources.get_distribution("scipy").version < "1.0.0":
     from scipy.misc import logsumexp
 else:
     from scipy.special import logsumexp
 
 
-__all__ = ['autocorr', 'autocov', 'waic', 'loo', 'hpd', 'quantiles',
-           'mc_error', 'summary', 'compare', 'bfmi', 'r2_score']
+__all__ = [
+    "autocorr",
+    "autocov",
+    "waic",
+    "loo",
+    "hpd",
+    "quantiles",
+    "mc_error",
+    "summary",
+    "compare",
+    "bfmi",
+    "r2_score",
+]
 
 
 def statfunc(f):
@@ -34,23 +45,23 @@ def statfunc(f):
 
     def wrapped_f(pymc3_obj, *args, **kwargs):
         try:
-            vars = kwargs.pop('vars',  pymc3_obj.varnames)
-            chains = kwargs.pop('chains', pymc3_obj.chains)
+            vars = kwargs.pop("vars", pymc3_obj.varnames)
+            chains = kwargs.pop("chains", pymc3_obj.chains)
         except AttributeError:
             # If fails, assume that raw data was passed.
             return f(pymc3_obj, *args, **kwargs)
 
-        burn = kwargs.pop('burn', 0)
-        thin = kwargs.pop('thin', 1)
-        combine = kwargs.pop('combine', False)
+        burn = kwargs.pop("burn", 0)
+        thin = kwargs.pop("thin", 1)
+        combine = kwargs.pop("combine", False)
         # Remove outer level chain keys if only one chain)
-        squeeze = kwargs.pop('squeeze', True)
+        squeeze = kwargs.pop("squeeze", True)
 
         results = {chain: {} for chain in chains}
         for var in vars:
-            samples = pymc3_obj.get_values(var, chains=chains, burn=burn,
-                                           thin=thin, combine=combine,
-                                           squeeze=False)
+            samples = pymc3_obj.get_values(
+                var, chains=chains, burn=burn, thin=thin, combine=combine, squeeze=False
+            )
             for chain, data in zip(chains, samples):
                 results[chain][var] = f(np.squeeze(data), *args, **kwargs)
 
@@ -82,7 +93,7 @@ def autocorr(x, lag=None):
     y = x - x.mean()
     n = len(y)
     result = fftconvolve(y, y[::-1])
-    acorr = result[len(result) // 2:]
+    acorr = result[len(result) // 2 :]
     acorr /= np.arange(n, 0, -1)
     acorr /= acorr[0]
     if lag is None:
@@ -91,7 +102,8 @@ def autocorr(x, lag=None):
         warnings.warn(
             "The `lag` argument has been deprecated. If you want to get "
             "the value of a specific lag please call `autocorr(x)[lag]`.",
-            DeprecationWarning)
+            DeprecationWarning,
+        )
         return acorr[lag]
 
 
@@ -117,7 +129,8 @@ def autocov(x, lag=None):
         warnings.warn(
             "The `lag` argument has been deprecated. If you want to get "
             "the value of a specific lag please call `autocov(x)[lag]`.",
-            DeprecationWarning)
+            DeprecationWarning,
+        )
         return acov[lag]
 
 
@@ -144,7 +157,7 @@ def _log_post_trace(trace, model=None, progressbar=False):
 
     def logp_vals_point(pt):
         if len(model.observed_RVs) == 0:
-            return floatX(np.array([], dtype='d'))
+            return floatX(np.array([], dtype="d"))
 
         logp_vals = []
         for var, logp in cached:
@@ -203,20 +216,22 @@ def waic(trace, model=None, pointwise=False, progressbar=False):
 
     log_py = _log_post_trace(trace, model, progressbar=progressbar)
     if log_py.size == 0:
-        raise ValueError('The model does not contain observed values.')
+        raise ValueError("The model does not contain observed values.")
 
     lppd_i = logsumexp(log_py, axis=0, b=1.0 / log_py.shape[0])
 
     vars_lpd = np.var(log_py, axis=0)
     warn_mg = 0
     if np.any(vars_lpd > 0.4):
-        warnings.warn("""For one or more samples the posterior variance of the
+        warnings.warn(
+            """For one or more samples the posterior variance of the
         log predictive densities exceeds 0.4. This could be indication of
         WAIC starting to fail see http://arxiv.org/abs/1507.04544 for details
-        """)
+        """
+        )
         warn_mg = 1
 
-    waic_i = - 2 * (lppd_i - vars_lpd)
+    waic_i = -2 * (lppd_i - vars_lpd)
 
     waic_se = np.sqrt(len(waic_i) * np.var(waic_i))
 
@@ -226,14 +241,16 @@ def waic(trace, model=None, pointwise=False, progressbar=False):
 
     if pointwise:
         if np.equal(waic, waic_i).all():
-            warnings.warn("""The point-wise WAIC is the same with the sum WAIC,
+            warnings.warn(
+                """The point-wise WAIC is the same with the sum WAIC,
             please double check the Observed RV in your model to make sure it
             returns element-wise logp.
-            """)
-        WAIC_r = namedtuple('WAIC_r', 'WAIC, WAIC_se, p_WAIC, var_warn, WAIC_i')
+            """
+            )
+        WAIC_r = namedtuple("WAIC_r", "WAIC, WAIC_se, p_WAIC, var_warn, WAIC_i")
         return WAIC_r(waic, waic_se, p_waic, warn_mg, waic_i)
     else:
-        WAIC_r = namedtuple('WAIC_r', 'WAIC, WAIC_se, p_WAIC, var_warn')
+        WAIC_r = namedtuple("WAIC_r", "WAIC, WAIC_se, p_WAIC, var_warn")
         return WAIC_r(waic, waic_se, p_waic, warn_mg)
 
 
@@ -273,46 +290,50 @@ def loo(trace, model=None, pointwise=False, reff=None, progressbar=False):
 
     if reff is None:
         if trace.nchains == 1:
-            reff = 1.
+            reff = 1.0
         else:
             eff = pm.effective_n(trace)
-            eff_ave = pm.stats.dict2pd(eff, 'eff').mean()
+            eff_ave = pm.stats.dict2pd(eff, "eff").mean()
             samples = len(trace) * trace.nchains
             reff = eff_ave / samples
 
     log_py = _log_post_trace(trace, model, progressbar=progressbar)
     if log_py.size == 0:
-        raise ValueError('The model does not contain observed values.')
+        raise ValueError("The model does not contain observed values.")
 
     lw, ks = _psislw(-log_py, reff)
     lw += log_py
 
     warn_mg = 0
     if np.any(ks > 0.7):
-        warnings.warn("""Estimated shape parameter of Pareto distribution is
+        warnings.warn(
+            """Estimated shape parameter of Pareto distribution is
         greater than 0.7 for one or more samples.
         You should consider using a more robust model, this is because
         importance sampling is less likely to work well if the marginal
         posterior and LOO posterior are very different. This is more likely to
-        happen with a non-robust model and highly influential observations.""")
+        happen with a non-robust model and highly influential observations."""
+        )
         warn_mg = 1
 
-    loo_lppd_i = - 2 * logsumexp(lw, axis=0)
+    loo_lppd_i = -2 * logsumexp(lw, axis=0)
     loo_lppd = loo_lppd_i.sum()
     loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5
-    lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0]))
+    lppd = np.sum(logsumexp(log_py, axis=0, b=1.0 / log_py.shape[0]))
     p_loo = lppd + (0.5 * loo_lppd)
 
     if pointwise:
         if np.equal(loo_lppd, loo_lppd_i).all():
-            warnings.warn("""The point-wise LOO is the same with the sum LOO,
+            warnings.warn(
+                """The point-wise LOO is the same with the sum LOO,
             please double check the Observed RV in your model to make sure it
             returns element-wise logp.
-            """)
-        LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, shape_warn, LOO_i')
+            """
+            )
+        LOO_r = namedtuple("LOO_r", "LOO, LOO_se, p_LOO, shape_warn, LOO_i")
         return LOO_r(loo_lppd, loo_lppd_se, p_loo, warn_mg, loo_lppd_i)
     else:
-        LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, shape_warn')
+        LOO_r = namedtuple("LOO_r", "LOO, LOO_se, p_LOO, shape_warn")
         return LOO_r(loo_lppd, loo_lppd_se, p_loo, warn_mg)
 
 
@@ -335,13 +356,13 @@ def _psislw(lw, reff):
     """
     n, m = lw.shape
 
-    lw_out = np.copy(lw, order='F')
+    lw_out = np.copy(lw, order="F")
     kss = np.empty(m)
 
     # precalculate constants
-    cutoff_ind = - int(np.ceil(min(n / 5., 3 * (n / reff) ** 0.5))) - 1
+    cutoff_ind = -int(np.ceil(min(n / 5.0, 3 * (n / reff) ** 0.5))) - 1
     cutoffmin = np.log(np.finfo(float).tiny)
-    k_min = 1. / 3
+    k_min = 1.0 / 3
 
     # loop over sets of log weights
     for i, x in enumerate(lw_out.T):
@@ -405,10 +426,10 @@ def _gpdfit(x):
     prior_bs = 3
     prior_k = 10
     n = len(x)
-    m = 30 + int(n**0.5)
+    m = 30 + int(n ** 0.5)
 
     bs = 1 - np.sqrt(m / (np.arange(1, m + 1, dtype=float) - 0.5))
-    bs /= prior_bs * x[int(n/4 + 0.5) - 1]
+    bs /= prior_bs * x[int(n / 4 + 0.5) - 1]
     bs += 1 / x[-1]
 
     ks = np.log1p(-bs[:, None] * x).mean(axis=1)
@@ -426,10 +447,10 @@ def _gpdfit(x):
     # posterior mean for b
     b = np.sum(bs * w)
     # estimate for k
-    k = np.log1p(- b * x).mean()
+    k = np.log1p(-b * x).mean()
     # add prior for k
     k = (n * k + prior_k * 0.5) / (n + prior_k)
-    sigma = - k / b
+    sigma = -k / b
 
     return k, sigma
 
@@ -442,13 +463,13 @@ def _gpinv(p, k, sigma):
     ok = (p > 0) & (p < 1)
     if np.all(ok):
         if np.abs(k) < np.finfo(float).eps:
-            x = - np.log1p(-p)
+            x = -np.log1p(-p)
         else:
             x = np.expm1(-k * np.log1p(-p)) / k
         x *= sigma
     else:
         if np.abs(k) < np.finfo(float).eps:
-            x[ok] = - np.log1p(-p[ok])
+            x[ok] = -np.log1p(-p[ok])
         else:
             x[ok] = np.expm1(-k * np.log1p(-p[ok])) / k
         x *= sigma
@@ -456,14 +477,21 @@ def _gpinv(p, k, sigma):
         if k >= 0:
             x[p == 1] = np.inf
         else:
-            x[p == 1] = - sigma / k
+            x[p == 1] = -sigma / k
 
     return x
 
 
-def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000,
-            alpha=1, seed=None, round_to=2):
-    R"""Compare models based on the widely available information criterion (WAIC)
+def compare(
+    model_dict,
+    ic="WAIC",
+    method="stacking",
+    b_samples=1000,
+    alpha=1,
+    seed=None,
+    round_to=2,
+):
+    r"""Compare models based on the widely available information criterion (WAIC)
     or leave-one-out (LOO) cross-validation.
     Read more theory here - in a paper by some of the leading authorities on
     model selection - dx.doi.org/10.1111/1467-9868.00353
@@ -528,29 +556,34 @@ def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000,
     if not names:
         names = np.arange(len(model_dict))
 
-    if ic == 'WAIC':
+    if ic == "WAIC":
         ic_func = waic
-        df_comp = pd.DataFrame(index=names,
-                               columns=['WAIC', 'pWAIC', 'dWAIC', 'weight',
-                                        'SE', 'dSE', 'var_warn'])
+        df_comp = pd.DataFrame(
+            index=names,
+            columns=["WAIC", "pWAIC", "dWAIC", "weight", "SE", "dSE", "var_warn"],
+        )
 
-    elif ic == 'LOO':
+    elif ic == "LOO":
         ic_func = loo
-        df_comp = pd.DataFrame(index=names,
-                               columns=['LOO', 'pLOO', 'dLOO', 'weight',
-                                        'SE', 'dSE', 'shape_warn'])
+        df_comp = pd.DataFrame(
+            index=names,
+            columns=["LOO", "pLOO", "dLOO", "weight", "SE", "dSE", "shape_warn"],
+        )
 
     else:
         raise NotImplementedError(
-            'The information criterion {} is not supported.'.format(ic))
+            "The information criterion {} is not supported.".format(ic)
+        )
 
     if len(set([len(m.observed_RVs) for m in model_dict])) != 1:
         raise ValueError(
-            'The number of observed RVs should be the same across all models')
+            "The number of observed RVs should be the same across all models"
+        )
 
-    if method not in ['stacking', 'BB-pseudo-BMA', 'pseudo-BMA']:
-        raise ValueError('The method {}, to compute weights,'
-                         'is not supported.'.format(method))
+    if method not in ["stacking", "BB-pseudo-BMA", "pseudo-BMA"]:
+        raise ValueError(
+            "The method {}, to compute weights," "is not supported.".format(method)
+        )
 
     ics = []
     for n, (m, t) in zip(names, model_dict.items()):
@@ -558,17 +591,17 @@ def compare(model_dict, ic='WAIC', method='stacking', b_samples=1000,
 
     ics.sort(key=lambda x: x[1][0])
 
-    if method == 'stacking':
+    if method == "stacking":
         N, K, ic_i = _ic_matrix(ics)
         exp_ic_i = np.exp(-0.5 * ic_i)
         Km = K - 1
 
         def w_fuller(w):
-            return np.concatenate((w, [max(1. - np.sum(w), 0.)]))
+            return np.concatenate((w, [max(1.0 - np.sum(w), 0.0)]))
 
         def log_score(w):
             w_full = w_fuller(w)
-            score = 0.
+            score = 0.0
             for i in range(N):
                 score += np.log(np.dot(exp_ic_i[i], w_full))
             return -score
@@ -578,30 +611,36 @@ def gradient(w):
             grad = np.zeros(Km)
             for k in range(Km):
                 for i in range(N):
-                    grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / \
-                        np.dot(exp_ic_i[i], w_full)
+                    grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / np.dot(
+                        exp_ic_i[i], w_full
+                    )
             return -grad
 
-        theta = np.full(Km, 1. / K)
-        bounds = [(0., 1.) for i in range(Km)]
-        constraints = [{'type': 'ineq', 'fun': lambda x: -np.sum(x) + 1.},
-                       {'type': 'ineq', 'fun': lambda x: np.sum(x)}]
-
-        w = minimize(fun=log_score,
-                     x0=theta,
-                     jac=gradient,
-                     bounds=bounds,
-                     constraints=constraints)
-
-        weights = w_fuller(w['x'])
+        theta = np.full(Km, 1.0 / K)
+        bounds = [(0.0, 1.0) for i in range(Km)]
+        constraints = [
+            {"type": "ineq", "fun": lambda x: -np.sum(x) + 1.0},
+            {"type": "ineq", "fun": lambda x: np.sum(x)},
+        ]
+
+        w = minimize(
+            fun=log_score,
+            x0=theta,
+            jac=gradient,
+            bounds=bounds,
+            constraints=constraints,
+        )
+
+        weights = w_fuller(w["x"])
         ses = [res[1] for _, res in ics]
 
-    elif method == 'BB-pseudo-BMA':
+    elif method == "BB-pseudo-BMA":
         N, K, ic_i = _ic_matrix(ics)
         ic_i = ic_i * N
 
-        b_weighting = dirichlet.rvs(alpha=[alpha] * N, size=b_samples,
-                                    random_state=seed)
+        b_weighting = dirichlet.rvs(
+            alpha=[alpha] * N, size=b_samples, random_state=seed
+        )
         weights = np.zeros((b_samples, K))
         z_bs = np.zeros_like(weights)
         for i in range(b_samples):
@@ -613,7 +652,7 @@ def gradient(w):
         weights = weights.mean(0)
         ses = z_bs.std(0)
 
-    elif method == 'pseudo-BMA':
+    elif method == "pseudo-BMA":
         min_ic = ics[0][1][0]
         Z = np.sum([np.exp(-0.5 * (x[1][0] - min_ic)) for x in ics])
         weights = []
@@ -629,13 +668,15 @@ def gradient(w):
             d_se = np.sqrt(len(diff) * np.var(diff))
             se = ses[i]
             weight = weights[i]
-            df_comp.at[idx] = (round(res[0], round_to),
-                               round(res[2], round_to),
-                               round(d_ic, round_to),
-                               round(weight, round_to),
-                               round(se, round_to),
-                               round(d_se, round_to),
-                               res[3])
+            df_comp.at[idx] = (
+                round(res[0], round_to),
+                round(res[2], round_to),
+                round(d_ic, round_to),
+                round(weight, round_to),
+                round(se, round_to),
+                round(d_se, round_to),
+                res[3],
+            )
 
         return df_comp.sort_values(by=ic)
 
@@ -651,13 +692,15 @@ def _ic_matrix(ics):
     for i in range(K):
         ic = ics[i][1][4]
         if len(ic) != N:
-            raise ValueError('The number of observations should be the same '
-                             'across all models')
+            raise ValueError(
+                "The number of observations should be the same " "across all models"
+            )
         else:
             ic_i[:, i] = ic
 
     return N, K, ic_i
 
+
 def make_indices(dimensions):
     # Generates complete set of indices for given dimensions
     level = len(dimensions)
@@ -690,7 +733,7 @@ def calc_min_interval(x, alpha):
     interval_width = x[interval_idx_inc:] - x[:n_intervals]
 
     if len(interval_width) == 0:
-        raise ValueError('Too few elements for interval calculation')
+        raise ValueError("Too few elements for interval calculation")
 
     min_idx = np.argmin(interval_width)
     hdi_min = x[min_idx]
@@ -752,14 +795,16 @@ def hpd(x, alpha=0.05, transform=lambda x: x):
 
 
 def _hpd_df(x, alpha):
-    cnames = ['hpd_{0:g}'.format(100 * alpha / 2),
-              'hpd_{0:g}'.format(100 * (1 - alpha / 2))]
+    cnames = [
+        "hpd_{0:g}".format(100 * alpha / 2),
+        "hpd_{0:g}".format(100 * (1 - alpha / 2)),
+    ]
     return pd.DataFrame(hpd(x, alpha), columns=cnames)
 
 
 @statfunc
 def mc_error(x, batches=5):
-    R"""Calculates the simulation standard error, accounting for non-independent
+    r"""Calculates the simulation standard error, accounting for non-independent
         samples. The trace is divided into batches, and the standard deviation of
         the batch means is calculated.
 
@@ -777,7 +822,7 @@ def mc_error(x, batches=5):
     if x.ndim > 1:
 
         dims = np.shape(x)
-        #ttrace = np.transpose(np.reshape(trace, (dims[0], sum(dims[1:]))))
+        # ttrace = np.transpose(np.reshape(trace, (dims[0], sum(dims[1:]))))
         trace = np.transpose([t.ravel() for t in x])
 
         return np.reshape([mc_error(t, batches) for t in trace], dims[1:])
@@ -801,7 +846,7 @@ def mc_error(x, batches=5):
 
 @statfunc
 def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5), transform=lambda x: x):
-    R"""Returns a dictionary of requested quantiles from array
+    r"""Returns a dictionary of requested quantiles from array
 
     Parameters
     ----------
@@ -836,11 +881,13 @@ def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5), transform=lambda x: x):
     except IndexError:
         pm._log.warning("Too few elements for quantile calculation")
 
+
 def dict2pd(statdict, labelname):
     """Small helper function to transform a diagnostics output dict into a
     pandas Series.
     """
     from .backends import tracetab as ttab
+
     var_dfs = []
     for key, value in statdict.items():
         var_df = pd.Series(value.flatten())
@@ -850,10 +897,19 @@ def dict2pd(statdict, labelname):
     statpd = statpd.rename(labelname)
     return statpd
 
-def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
-               extend=False, include_transformed=False,
-               alpha=0.05, start=0, batches=None):
-    R"""Create a data frame with summary statistics.
+
+def summary(
+    trace,
+    varnames=None,
+    transform=lambda x: x,
+    stat_funcs=None,
+    extend=False,
+    include_transformed=False,
+    alpha=0.05,
+    start=0,
+    batches=None,
+):
+    r"""Create a data frame with summary statistics.
 
     Parameters
     ----------
@@ -939,16 +995,19 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
     from .backends import tracetab as ttab
 
     if varnames is None:
-        varnames = get_default_varnames(trace.varnames,
-                       include_transformed=include_transformed)
+        varnames = get_default_varnames(
+            trace.varnames, include_transformed=include_transformed
+        )
 
     if batches is None:
         batches = min([100, len(trace)])
 
-    funcs = [lambda x: pd.Series(np.mean(x, 0), name='mean'),
-             lambda x: pd.Series(np.std(x, 0), name='sd'),
-             lambda x: pd.Series(mc_error(x, batches), name='mc_error'),
-             lambda x: _hpd_df(x, alpha)]
+    funcs = [
+        lambda x: pd.Series(np.mean(x, 0), name="mean"),
+        lambda x: pd.Series(np.std(x, 0), name="sd"),
+        lambda x: pd.Series(mc_error(x, batches), name="mc_error"),
+        lambda x: _hpd_df(x, alpha),
+    ]
 
     if stat_funcs is not None:
         if extend:
@@ -970,16 +1029,15 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
     elif trace.nchains < 2:
         return dforg
     else:
-        n_eff = pm.effective_n(trace,
-                               varnames=varnames,
-                               include_transformed=include_transformed)
-        n_eff_pd = dict2pd(n_eff, 'n_eff')
-        rhat = pm.gelman_rubin(trace,
-                               varnames=varnames,
-                               include_transformed=include_transformed)
-        rhat_pd = dict2pd(rhat, 'Rhat')
-        return pd.concat([dforg, n_eff_pd, rhat_pd],
-                         axis=1, join_axes=[dforg.index])
+        n_eff = pm.effective_n(
+            trace, varnames=varnames, include_transformed=include_transformed
+        )
+        n_eff_pd = dict2pd(n_eff, "n_eff")
+        rhat = pm.gelman_rubin(
+            trace, varnames=varnames, include_transformed=include_transformed
+        )
+        rhat_pd = dict2pd(rhat, "Rhat")
+        return pd.concat([dforg, n_eff_pd, rhat_pd], axis=1, join_axes=[dforg.index])
 
 
 def _calculate_stats(sample, batches, alpha):
@@ -992,14 +1050,14 @@ def _calculate_stats(sample, batches, alpha):
         for idx in idxs:
             mean, sd, mce = [stat[idx] for stat in (means, sds, mces)]
             interval = intervals[idx].squeeze().tolist()
-            yield {'mean': mean, 'sd': sd, 'mce': mce, 'hpd': interval}
+            yield {"mean": mean, "sd": sd, "mce": mce, "hpd": interval}
 
 
 def _calculate_posterior_quantiles(sample, qlist):
     var_quantiles = quantiles(sample, qlist=qlist)
     # Replace ends of qlist with 'lo' and 'hi'
-    qends = {qlist[0]: 'lo', qlist[-1]: 'hi'}
-    qkeys = {q: qends[q] if q in qends else 'q{}'.format(q) for q in qlist}
+    qends = {qlist[0]: "lo", qlist[-1]: "hi"}
+    qkeys = {q: qends[q] if q in qends else "q{}".format(q) for q in qlist}
     for key, idxs in _groupby_leading_idxs(sample.shape[1:]):
         yield key
         for idx in idxs:
@@ -1044,7 +1102,7 @@ def _groupby_leading_idxs(shape):
 
 
 def bfmi(trace):
-    R"""Calculate the estimated Bayesian fraction of missing information (BFMI).
+    r"""Calculate the estimated Bayesian fraction of missing information (BFMI).
 
     BFMI quantifies how well momentum resampling matches the marginal energy
     distribution.  For more information on BFMI, see
@@ -1063,13 +1121,13 @@ def bfmi(trace):
     z : float
         The Bayesian fraction of missing information of the model and trace.
     """
-    energy = trace['energy']
+    energy = trace["energy"]
 
     return np.square(np.diff(energy)).mean() / np.var(energy)
 
 
 def r2_score(y_true, y_pred, round_to=2):
-    R"""R-squared for Bayesian regression models. Only valid for linear models.
+    r"""R-squared for Bayesian regression models. Only valid for linear models.
     http://www.stat.columbia.edu/%7Egelman/research/unpublished/bayes_R2.pdf
 
     Parameters
@@ -1099,6 +1157,5 @@ def r2_score(y_true, y_pred, round_to=2):
     r2_median = np.around(np.median(r2), round_to)
     r2_mean = np.around(np.mean(r2), round_to)
     r2_std = np.around(np.std(r2), round_to)
-    r2_r = namedtuple('r2_r', 'r2_median, r2_mean, r2_std')
+    r2_r = namedtuple("r2_r", "r2_median, r2_mean, r2_std")
     return r2_r(r2_median, r2_mean, r2_std)
-
diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index 8366bbb5c3..413887f98e 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -6,8 +6,7 @@
 from numpy.random import uniform
 from enum import IntEnum, unique
 
-__all__ = [
-    'ArrayStep', 'ArrayStepShared', 'metrop_select', 'Competence']
+__all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"]
 
 
 @unique
@@ -19,6 +18,7 @@ class Competence(IntEnum):
     2: PREFERRED
     3: IDEAL
     """
+
     INCOMPATIBLE = 0
     COMPATIBLE = 1
     PREFERRED = 2
@@ -30,21 +30,21 @@ class BlockedStep(object):
     generates_stats = False
 
     def __new__(cls, *args, **kwargs):
-        blocked = kwargs.get('blocked')
+        blocked = kwargs.get("blocked")
         if blocked is None:
             # Try to look up default value from class
-            blocked = getattr(cls, 'default_blocked', True)
-            kwargs['blocked'] = blocked
+            blocked = getattr(cls, "default_blocked", True)
+            kwargs["blocked"] = blocked
 
-        model = modelcontext(kwargs.get('model'))
-        kwargs.update({'model':model})
+        model = modelcontext(kwargs.get("model"))
+        kwargs.update({"model": model})
 
         # vars can either be first arg or a kwarg
-        if 'vars' not in kwargs and len(args) >= 1:
+        if "vars" not in kwargs and len(args) >= 1:
             vars = args[0]
             args = args[1:]
-        elif 'vars' in kwargs:
-            vars = kwargs.pop('vars')
+        elif "vars" in kwargs:
+            vars = kwargs.pop("vars")
         else:  # Assume all model variables
             vars = model.vars
 
@@ -52,7 +52,7 @@ def __new__(cls, *args, **kwargs):
         vars = inputvars(vars)
 
         if len(vars) == 0:
-            raise ValueError('No free random variables to sample.')
+            raise ValueError("No free random variables to sample.")
 
         if not blocked and len(vars) > 1:
             # In this case we create a separate sampler for each var
@@ -64,14 +64,14 @@ def __new__(cls, *args, **kwargs):
                 # call __init__
                 step.__init__([var], *args, **kwargs)
                 # Hack for creating the class correctly when unpickling.
-                step.__newargs = ([var], ) + args, kwargs
+                step.__newargs = ([var],) + args, kwargs
                 steps.append(step)
 
             return CompoundStep(steps)
         else:
             step = super(BlockedStep, cls).__new__(cls)
             # Hack for creating the class correctly when unpickling.
-            step.__newargs = (vars, ) + args, kwargs
+            step.__newargs = (vars,) + args, kwargs
             return step
 
     # Hack for creating the class correctly when unpickling.
@@ -104,7 +104,7 @@ def vars_shape_dtype(self):
         return shape_dtypes
 
     def stop_tuning(self):
-        if hasattr(self, 'tune'):
+        if hasattr(self, "tune"):
             self.tune = False
 
 
@@ -212,20 +212,22 @@ def link_population(self, population, chain_index):
         self.this_chain = chain_index
         self.other_chains = [c for c in range(len(population)) if c != chain_index]
         if not len(self.other_chains) > 1:
-            raise ValueError('Population is just {} + {}. This is too small. You should ' \
-                'increase the number of chains.'.format(self.this_chain, self.other_chains))
+            raise ValueError(
+                "Population is just {} + {}. This is too small. You should "
+                "increase the number of chains.".format(
+                    self.this_chain, self.other_chains
+                )
+            )
         return
 
 
 class GradientSharedStep(BlockedStep):
-    def __init__(self, vars, model=None, blocked=True,
-                 dtype=None, **theano_kwargs):
+    def __init__(self, vars, model=None, blocked=True, dtype=None, **theano_kwargs):
         model = modelcontext(model)
         self.vars = vars
         self.blocked = blocked
 
-        func = model.logp_dlogp_function(
-            vars, dtype=dtype, **theano_kwargs)
+        func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs)
 
         # handle edge case discovered in #2948
         try:
@@ -233,9 +235,8 @@ def __init__(self, vars, model=None, blocked=True,
             q = func.dict_to_array(model.test_point)
             logp, dlogp = func(q)
         except ValueError:
-            theano_kwargs.update(mode='FAST_COMPILE')
-            func = model.logp_dlogp_function(
-                vars, dtype=dtype, **theano_kwargs)
+            theano_kwargs.update(mode="FAST_COMPILE")
+            func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs)
 
         self._logp_dlogp_func = func
 
diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py
index 8deb0555fd..fead664e5d 100644
--- a/pymc3/step_methods/compound.py
+++ b/pymc3/step_methods/compound.py
@@ -1,8 +1,8 @@
-'''
+"""
 Created on Mar 7, 2011
 
 @author: johnsalvatier
-'''
+"""
 import numpy as np
 
 
@@ -12,8 +12,7 @@ class CompoundStep(object):
 
     def __init__(self, methods):
         self.methods = list(methods)
-        self.generates_stats = any(
-            method.generates_stats for method in self.methods)
+        self.generates_stats = any(method.generates_stats for method in self.methods)
         self.stats_dtypes = []
         for method in self.methods:
             if method.generates_stats:
@@ -32,7 +31,7 @@ def step(self, point):
             # one. Pop all others (if dict), or set to np.nan (if namedtuple).
             for state in states[:-1]:
                 if isinstance(state, dict):
-                    state.pop('model_logp', None)
+                    state.pop("model_logp", None)
                 elif isinstance(state, namedtuple):
                     state = state._replace(logp=np.nan)
             return point, states
@@ -44,7 +43,7 @@ def step(self, point):
     def warnings(self):
         warns = []
         for method in self.methods:
-            if hasattr(method, 'warnings'):
+            if hasattr(method, "warnings"):
                 warns.extend(method.warnings())
         return warns
 
diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py
index 5936c554c0..cd4cf47fdd 100644
--- a/pymc3/step_methods/elliptical_slice.py
+++ b/pymc3/step_methods/elliptical_slice.py
@@ -7,7 +7,7 @@
 from ..theanof import inputvars
 from ..distributions import draw_values
 
-__all__ = ['EllipticalSlice']
+__all__ = ["EllipticalSlice"]
 
 
 def get_chol(cov, chol):
@@ -27,7 +27,7 @@ def get_chol(cov, chol):
     """
 
     if len([i for i in [cov, chol] if i is not None]) != 1:
-        raise ValueError('Must pass exactly one of cov or chol')
+        raise ValueError("Must pass exactly one of cov or chol")
 
     if cov is not None:
         chol = tt.slinalg.cholesky(cov)
@@ -69,8 +69,9 @@ class EllipticalSlice(ArrayStep):
 
     default_blocked = True
 
-    def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None,
-                 **kwargs):
+    def __init__(
+        self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs
+    ):
         self.model = modelcontext(model)
         chol = get_chol(prior_cov, prior_chol)
         self.prior_chol = tt.as_tensor_variable(chol)
diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py
index 92fc6b02a1..5f680c157f 100644
--- a/pymc3/step_methods/gibbs.py
+++ b/pymc3/step_methods/gibbs.py
@@ -1,18 +1,29 @@
-'''
+"""
 Created on May 12, 2012
 
 @author: john
-'''
+"""
 from .arraystep import ArrayStep, Competence
 from ..distributions.discrete import Categorical
-from numpy import array, max, exp, cumsum, nested_iters, empty, searchsorted, ones, arange
+from numpy import (
+    array,
+    max,
+    exp,
+    cumsum,
+    nested_iters,
+    empty,
+    searchsorted,
+    ones,
+    arange,
+)
 from numpy.random import uniform
 from warnings import warn
 
 from theano.gof.graph import inputs
 from theano.tensor import add
 from ..model import modelcontext
-__all__ = ['ElemwiseCategorical']
+
+__all__ = ["ElemwiseCategorical"]
 
 
 class ElemwiseCategorical(ArrayStep):
@@ -21,13 +32,17 @@ class ElemwiseCategorical(ArrayStep):
     the variable can't be indexed into or transposed or anything otherwise that will mess things up
 
     """
+
     # TODO: It would be great to come up with a way to make
     # ElemwiseCategorical  more general (handling more complex elementwise
     # variables)
 
     def __init__(self, vars, values=None, model=None):
-        warn('ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.',
-             DeprecationWarning, stacklevel = 2)
+        warn(
+            "ElemwiseCategorical is deprecated, switch to CategoricalGibbsMetropolis.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         model = modelcontext(model)
         self.var = vars[0]
         self.sh = ones(self.var.dshape, self.var.dtype)
@@ -37,7 +52,8 @@ def __init__(self, vars, values=None, model=None):
             self.values = values
 
         super(ElemwiseCategorical, self).__init__(
-            vars, [elemwise_logp(model, self.var)])
+            vars, [elemwise_logp(model, self.var)]
+        )
 
     def astep(self, q, logp):
         p = array([logp(v * self.sh) for v in self.values])
@@ -51,8 +67,7 @@ def competence(var, has_grad):
 
 
 def elemwise_logp(model, var):
-    terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([
-                                                                        v.logpt])]
+    terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([v.logpt])]
     return model.fn(add(*terms))
 
 
@@ -60,9 +75,12 @@ def categorical(prob, shape):
     out = empty([1] + list(shape))
 
     n = len(shape)
-    it0, it1 = nested_iters([prob, out], [list(range(1, n + 1)), [0]],
-                            op_flags=[['readonly'], ['readwrite']],
-                            flags=['reduce_ok'])
+    it0, it1 = nested_iters(
+        [prob, out],
+        [list(range(1, n + 1)), [0]],
+        op_flags=[["readonly"], ["readwrite"]],
+        flags=["reduce_ok"],
+    )
 
     for _ in it0:
         p, o = it1.itviews
diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index 96ad165888..0eecfbc391 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -10,16 +10,13 @@
 from .quadpotential import quad_potential, QuadPotentialDiagAdapt
 from pymc3.step_methods import step_sizes
 from pymc3.backends.report import SamplerWarning, WarningType
-logger = logging.getLogger('pymc3')
 
-HMCStepData = namedtuple(
-    "HMCStepData",
-    "end, accept_stat, divergence_info, stats")
+logger = logging.getLogger("pymc3")
 
+HMCStepData = namedtuple("HMCStepData", "end, accept_stat, divergence_info, stats")
 
-DivergenceInfo = namedtuple(
-    'DivergenceInfo',
-    'message, exec_info, state')
+
+DivergenceInfo = namedtuple("DivergenceInfo", "message, exec_info, state")
 
 
 class BaseHMC(arraystep.GradientSharedStep):
@@ -27,12 +24,26 @@ class BaseHMC(arraystep.GradientSharedStep):
 
     default_blocked = True
 
-    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
-                 model=None, blocked=True, potential=None,
-                 integrator="leapfrog", dtype=None, Emax=1000,
-                 target_accept=0.8, gamma=0.05, k=0.75, t0=10,
-                 adapt_step_size=True, step_rand=None,
-                 **theano_kwargs):
+    def __init__(
+        self,
+        vars=None,
+        scaling=None,
+        step_scale=0.25,
+        is_cov=False,
+        model=None,
+        blocked=True,
+        potential=None,
+        integrator="leapfrog",
+        dtype=None,
+        Emax=1000,
+        target_accept=0.8,
+        gamma=0.05,
+        k=0.75,
+        t0=10,
+        adapt_step_size=True,
+        step_rand=None,
+        **theano_kwargs
+    ):
         """Set up Hamiltonian samplers with common structures.
 
         Parameters
@@ -59,8 +70,9 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
             vars = self._model.cont_vars
         vars = inputvars(vars)
 
-        super(BaseHMC, self).__init__(vars, blocked=blocked, model=model,
-                                      dtype=dtype, **theano_kwargs)
+        super(BaseHMC, self).__init__(
+            vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs
+        )
 
         self.adapt_step_size = adapt_step_size
         self.Emax = Emax
@@ -70,7 +82,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
         self.step_size = step_scale / (size ** 0.25)
         self.target_accept = target_accept
         self.step_adapt = step_sizes.DualAverageAdaptation(
-            self.step_size, target_accept, gamma, k, t0)
+            self.step_size, target_accept, gamma, k, t0
+        )
 
         self.tune = True
 
@@ -92,7 +105,8 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
             self.potential = quad_potential(scaling, is_cov)
 
         self.integrator = integration.CpuLeapfrogIntegrator(
-            self.potential, self._logp_dlogp_func)
+            self.potential, self._logp_dlogp_func
+        )
 
         self._step_rand = step_rand
         self._warnings = []
@@ -111,13 +125,19 @@ def astep(self, q0):
         p0 = self.potential.random()
         start = self.integrator.compute_state(q0, p0)
         model = self._model
-    
+
         if not np.isfinite(start.energy):
             check_test_point = model.check_test_point()
-            error_logp = check_test_point.loc[(np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)]
+            error_logp = check_test_point.loc[
+                (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)
+            ]
             self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
-            logger.error("Bad initial energy, check any log probabilities that are inf or -inf, nan or very small:\n{}".format(error_logp.to_string()))
-            raise ValueError('Bad initial energy')
+            logger.error(
+                "Bad initial energy, check any log probabilities that are inf or -inf, nan or very small:\n{}".format(
+                    error_logp.to_string()
+                )
+            )
+            raise ValueError("Bad initial energy")
 
         adapt_step = self.tune and self.adapt_step_size
         step_size = self.step_adapt.current(adapt_step)
@@ -144,8 +164,8 @@ def astep(self, q0):
                 else:
                     point = None
             warning = SamplerWarning(
-                kind, info.message, 'debug', self.iter_count,
-                info.exec_info, point)
+                kind, info.message, "debug", self.iter_count, info.exec_info, point
+            )
 
             self._warnings.append(warning)
 
@@ -153,10 +173,7 @@ def astep(self, q0):
         if not self.tune:
             self._samples_after_tune += 1
 
-        stats = {
-            'tune': self.tune,
-            'diverging': bool(hmc_step.divergence_info),
-        }
+        stats = {"tune": self.tune, "diverging": bool(hmc_step.divergence_info)}
 
         stats.update(hmc_step.stats)
         stats.update(self.step_adapt.stats())
@@ -172,21 +189,28 @@ def warnings(self):
         warnings = self._warnings[:]
 
         # Generate a global warning for divergences
-        message = ''
+        message = ""
         n_divs = self._num_divs_sample
         if n_divs and self._samples_after_tune == n_divs:
-            message = ('The chain contains only diverging samples. The model '
-                       'is probably misspecified.')
+            message = (
+                "The chain contains only diverging samples. The model "
+                "is probably misspecified."
+            )
         elif n_divs == 1:
-            message = ('There was 1 divergence after tuning. Increase '
-                       '`target_accept` or reparameterize.')
+            message = (
+                "There was 1 divergence after tuning. Increase "
+                "`target_accept` or reparameterize."
+            )
         elif n_divs > 1:
-            message = ('There were %s divergences after tuning. Increase '
-                       '`target_accept` or reparameterize.' % n_divs)
+            message = (
+                "There were %s divergences after tuning. Increase "
+                "`target_accept` or reparameterize." % n_divs
+            )
 
         if message:
             warning = SamplerWarning(
-                WarningType.DIVERGENCES, message, 'error', None, None, None)
+                WarningType.DIVERGENCES, message, "error", None, None, None
+            )
             warnings.append(warning)
 
         warnings.extend(self.step_adapt.warnings())
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
index e918eca43c..302c312b08 100644
--- a/pymc3/step_methods/hmc/hmc.py
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -6,40 +6,50 @@
 from pymc3.step_methods.hmc.base_hmc import BaseHMC, HMCStepData, DivergenceInfo
 
 
-__all__ = ['HamiltonianMC']
+__all__ = ["HamiltonianMC"]
 
 
-def unif(step_size, elow=.85, ehigh=1.15):
+def unif(step_size, elow=0.85, ehigh=1.15):
     return np.random.uniform(elow, ehigh) * step_size
 
 
 class HamiltonianMC(BaseHMC):
-    R"""A sampler for continuous variables based on Hamiltonian mechanics.
+    r"""A sampler for continuous variables based on Hamiltonian mechanics.
 
     See NUTS sampler for automatically tuned stopping time and step size scaling.
     """
 
-    name = 'hmc'
+    name = "hmc"
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [{
-        'step_size': np.float64,
-        'n_steps': np.int64,
-        'tune': np.bool,
-        'step_size_bar': np.float64,
-        'accept': np.float64,
-        'diverging': np.bool,
-        'energy_error': np.float64,
-        'energy': np.float64,
-        'max_energy_error': np.float64,
-        'path_length': np.float64,
-        'accepted': np.bool,
-        'model_logp': np.float64,
-    }]
-
-    def __init__(self, vars=None, path_length=2.,
-                 adapt_step_size=True, gamma=0.05, k=0.75, t0=10,
-                 target_accept=0.8, **kwargs):
+    stats_dtypes = [
+        {
+            "step_size": np.float64,
+            "n_steps": np.int64,
+            "tune": np.bool,
+            "step_size_bar": np.float64,
+            "accept": np.float64,
+            "diverging": np.bool,
+            "energy_error": np.float64,
+            "energy": np.float64,
+            "max_energy_error": np.float64,
+            "path_length": np.float64,
+            "accepted": np.bool,
+            "model_logp": np.float64,
+        }
+    ]
+
+    def __init__(
+        self,
+        vars=None,
+        path_length=2.0,
+        adapt_step_size=True,
+        gamma=0.05,
+        k=0.75,
+        t0=10,
+        target_accept=0.8,
+        **kwargs
+    ):
         """Set up the Hamiltonian Monte Carlo sampler.
 
         Parameters
@@ -99,16 +109,17 @@ def _hamiltonian_step(self, start, p0, step_size):
             for _ in range(n_steps):
                 state = self.integrator.step(step_size, state)
         except IntegrationError as e:
-            div_info = DivergenceInfo('Divergence encountered.', e, state)
+            div_info = DivergenceInfo("Divergence encountered.", e, state)
         else:
             if not np.isfinite(state.energy):
                 div_info = DivergenceInfo(
-                    'Divergence encountered, bad energy.', None, state)
+                    "Divergence encountered, bad energy.", None, state
+                )
             energy_change = start.energy - state.energy
             if np.abs(energy_change) > self.Emax:
                 div_info = DivergenceInfo(
-                    'Divergence encountered, large integration error.',
-                    None, state)
+                    "Divergence encountered, large integration error.", None, state
+                )
 
         accept_stat = min(1, np.exp(energy_change))
 
@@ -120,13 +131,13 @@ def _hamiltonian_step(self, start, p0, step_size):
             accepted = True
 
         stats = {
-            'path_length': path_length,
-            'n_steps': n_steps,
-            'accept': accept_stat,
-            'energy_error': energy_change,
-            'energy': state.energy,
-            'accepted': accepted,
-            'model_logp': state.model_logp,
+            "path_length": path_length,
+            "n_steps": n_steps,
+            "accept": accept_stat,
+            "energy_error": energy_change,
+            "energy": state.energy,
+            "accepted": accepted,
+            "model_logp": state.model_logp,
         }
         return HMCStepData(end, accept_stat, div_info, stats)
 
diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py
index f64e876dcc..1ee9905201 100644
--- a/pymc3/step_methods/hmc/integration.py
+++ b/pymc3/step_methods/hmc/integration.py
@@ -4,7 +4,7 @@
 from scipy import linalg
 
 
-State = namedtuple("State", 'q, p, v, q_grad, energy, model_logp')
+State = namedtuple("State", "q, p, v, q_grad, energy, model_logp")
 
 
 class IntegrationError(RuntimeError):
@@ -18,14 +18,15 @@ def __init__(self, potential, logp_dlogp_func):
         self._logp_dlogp_func = logp_dlogp_func
         self._dtype = self._logp_dlogp_func.dtype
         if self._potential.dtype != self._dtype:
-            raise ValueError("dtypes of potential (%s) and logp function (%s)"
-                             "don't match."
-                             % (self._potential.dtype, self._dtype))
+            raise ValueError(
+                "dtypes of potential (%s) and logp function (%s)"
+                "don't match." % (self._potential.dtype, self._dtype)
+            )
 
     def compute_state(self, q, p):
         """Compute Hamiltonian functions using a position and momentum."""
         if q.dtype != self._dtype or p.dtype != self._dtype:
-            raise ValueError('Invalid dtype. Must be %s' % self._dtype)
+            raise ValueError("Invalid dtype. Must be %s" % self._dtype)
         logp, dlogp = self._logp_dlogp_func(q)
         v = self._potential.velocity(p)
         kinetic = self._potential.energy(p, velocity=v)
@@ -66,7 +67,7 @@ def step(self, epsilon, state, out=None):
 
     def _step(self, epsilon, state, out=None):
         pot = self._potential
-        axpy = linalg.blas.get_blas_funcs('axpy', dtype=self._dtype)
+        axpy = linalg.blas.get_blas_funcs("axpy", dtype=self._dtype)
 
         q, p, v, q_grad, energy, logp = state
         if out is None:
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 92fbe10f99..226e865116 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -12,7 +12,7 @@
 from pymc3.theanof import floatX
 from pymc3.vartypes import continuous_types
 
-__all__ = ['NUTS']
+__all__ = ["NUTS"]
 
 
 def logbern(log_p):
@@ -22,7 +22,7 @@ def logbern(log_p):
 
 
 class NUTS(BaseHMC):
-    R"""A sampler for continuous variables based on Hamiltonian mechanics.
+    r"""A sampler for continuous variables based on Hamiltonian mechanics.
 
     NUTS automatically tunes the step size and the number of steps per
     sample. A detailed description can be found at [1], "Algorithm 6:
@@ -72,27 +72,28 @@ class NUTS(BaseHMC):
        Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
     """
 
-    name = 'nuts'
+    name = "nuts"
 
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [{
-        'depth': np.int64,
-        'step_size': np.float64,
-        'tune': np.bool,
-        'mean_tree_accept': np.float64,
-        'step_size_bar': np.float64,
-        'tree_size': np.float64,
-        'diverging': np.bool,
-        'energy_error': np.float64,
-        'energy': np.float64,
-        'max_energy_error': np.float64,
-        'model_logp': np.float64,
-    }]
-
-    def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8,
-                 **kwargs):
-        R"""Set up the No-U-Turn sampler.
+    stats_dtypes = [
+        {
+            "depth": np.int64,
+            "step_size": np.float64,
+            "tune": np.bool,
+            "mean_tree_accept": np.float64,
+            "step_size_bar": np.float64,
+            "tree_size": np.float64,
+            "diverging": np.bool,
+            "energy_error": np.float64,
+            "energy": np.float64,
+            "max_energy_error": np.float64,
+            "model_logp": np.float64,
+        }
+    ]
+
+    def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs):
+        r"""Set up the No-U-Turn sampler.
 
         Parameters
         ----------
@@ -176,7 +177,7 @@ def _hamiltonian_step(self, start, p0, step_size):
                 self._reached_max_treedepth += 1
 
         stats = tree.stats()
-        accept_stat = stats['mean_tree_accept']
+        accept_stat = stats["mean_tree_accept"]
         return HMCStepData(tree.proposal, accept_stat, divergence_info, stats)
 
     @staticmethod
@@ -192,10 +193,11 @@ def warnings(self):
         n_treedepth = self._reached_max_treedepth
 
         if n_samples > 0 and n_treedepth / float(n_samples) > 0.05:
-            msg = ('The chain reached the maximum tree depth. Increase '
-                   'max_treedepth, increase target_accept or reparameterize.')
-            warn = SamplerWarning(WarningType.TREEDEPTH, msg, 'warn',
-                                  None, None, None)
+            msg = (
+                "The chain reached the maximum tree depth. Increase "
+                "max_treedepth, increase target_accept or reparameterize."
+            )
+            warn = SamplerWarning(WarningType.TREEDEPTH, msg, "warn", None, None, None)
             warnings.append(warn)
         return warnings
 
@@ -205,8 +207,8 @@ def warnings(self):
 
 # A subtree of the binary tree built by nuts.
 Subtree = namedtuple(
-    "Subtree",
-    "left, right, p_sum, proposal, log_size, accept_sum, n_proposals")
+    "Subtree", "left, right, p_sum, proposal, log_size, accept_sum, n_proposals"
+)
 
 
 class _Tree(object):
@@ -234,7 +236,8 @@ def __init__(self, ndim, integrator, start, step_size, Emax):
 
         self.left = self.right = start
         self.proposal = Proposal(
-            start.q, start.q_grad, start.energy, 1.0, start.model_logp)
+            start.q, start.q_grad, start.energy, 1.0, start.model_logp
+        )
         self.depth = 0
         self.log_size = 0
         self.accept_sum = 0
@@ -256,11 +259,13 @@ def extend(self, direction):
         """
         if direction > 0:
             tree, diverging, turning = self._build_subtree(
-                self.right, self.depth, floatX(np.asarray(self.step_size)))
+                self.right, self.depth, floatX(np.asarray(self.step_size))
+            )
             self.right = tree.right
         else:
             tree, diverging, turning = self._build_subtree(
-                self.left, self.depth, floatX(np.asarray(-self.step_size)))
+                self.left, self.depth, floatX(np.asarray(-self.step_size))
+            )
             self.left = tree.right
 
         self.depth += 1
@@ -301,13 +306,14 @@ def _single_step(self, left, epsilon):
                 p_accept = min(1, np.exp(-energy_change))
                 log_size = -energy_change
                 proposal = Proposal(
-                    right.q, right.q_grad, right.energy, p_accept, right.model_logp)
-                tree = Subtree(right, right, right.p,
-                               proposal, log_size, p_accept, 1)
+                    right.q, right.q_grad, right.energy, p_accept, right.model_logp
+                )
+                tree = Subtree(right, right, right.p, proposal, log_size, p_accept, 1)
                 return tree, None, False
             else:
-                error_msg = ("Energy change in leapfrog step is too large: %s."
-                             % energy_change)
+                error_msg = (
+                    "Energy change in leapfrog step is too large: %s." % energy_change
+                )
                 error = None
         tree = Subtree(None, None, None, None, -np.inf, 0, 1)
         divergance_info = DivergenceInfo(error_msg, error, left)
@@ -317,13 +323,11 @@ def _build_subtree(self, left, depth, epsilon):
         if depth == 0:
             return self._single_step(left, epsilon)
 
-        tree1, diverging, turning = self._build_subtree(
-            left, depth - 1, epsilon)
+        tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon)
         if diverging or turning:
             return tree1, diverging, turning
 
-        tree2, diverging, turning = self._build_subtree(
-            tree1.right, depth - 1, epsilon)
+        tree2, diverging, turning = self._build_subtree(tree1.right, depth - 1, epsilon)
 
         left, right = tree1.left, tree2.right
 
@@ -344,17 +348,16 @@ def _build_subtree(self, left, depth, epsilon):
         accept_sum = tree1.accept_sum + tree2.accept_sum
         n_proposals = tree1.n_proposals + tree2.n_proposals
 
-        tree = Subtree(left, right, p_sum, proposal,
-                       log_size, accept_sum, n_proposals)
+        tree = Subtree(left, right, p_sum, proposal, log_size, accept_sum, n_proposals)
         return tree, diverging, turning
 
     def stats(self):
         return {
-            'depth': self.depth,
-            'mean_tree_accept': self.accept_sum / self.n_proposals,
-            'energy_error': self.proposal.energy - self.start.energy,
-            'energy': self.proposal.energy,
-            'tree_size': self.n_proposals,
-            'max_energy_error': self.max_energy_change,
-            'model_logp': self.proposal.logp,
+            "depth": self.depth,
+            "mean_tree_accept": self.accept_sum / self.n_proposals,
+            "energy_error": self.proposal.energy - self.start.energy,
+            "energy": self.proposal.energy,
+            "tree_size": self.n_proposals,
+            "max_energy_error": self.max_energy_change,
+            "model_logp": self.proposal.logp,
         }
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index 6c67a8f1c3..1142948530 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -7,8 +7,14 @@
 from pymc3.theanof import floatX
 
 
-__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull',
-           'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', 'isquadpotential']
+__all__ = [
+    "quad_potential",
+    "QuadPotentialDiag",
+    "QuadPotentialFull",
+    "QuadPotentialFullInv",
+    "QuadPotentialDiagAdapt",
+    "isquadpotential",
+]
 
 
 def quad_potential(C, is_cov):
@@ -40,7 +46,7 @@ def quad_potential(C, is_cov):
         if is_cov:
             return QuadPotentialDiag(C)
         else:
-            return QuadPotentialDiag(1. / C)
+            return QuadPotentialDiag(1.0 / C)
     else:
         if is_cov:
             return QuadPotentialFull(C)
@@ -58,7 +64,8 @@ def partial_check_positive_definite(C):
 
     if len(i):
         raise PositiveDefiniteError(
-            "Simple check failed. Diagonal contains negatives", i)
+            "Simple check failed. Diagonal contains negatives", i
+        )
 
 
 class PositiveDefiniteError(ValueError):
@@ -68,23 +75,25 @@ def __init__(self, msg, idx):
         self.msg = msg
 
     def __str__(self):
-        return ("Scaling is not positive definite: %s. Check indexes %s."
-                % (self.msg, self.idx))
+        return "Scaling is not positive definite: %s. Check indexes %s." % (
+            self.msg,
+            self.idx,
+        )
 
 
 class QuadPotential(object):
     def velocity(self, x, out=None):
         """Compute the current velocity at a position in parameter space."""
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def energy(self, x, velocity=None):
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def random(self, x):
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def velocity_energy(self, x, v_out):
-        raise NotImplementedError('Abstract method')
+        raise NotImplementedError("Abstract method")
 
     def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning.
@@ -124,19 +133,30 @@ def isquadpotential(value):
 class QuadPotentialDiagAdapt(QuadPotential):
     """Adapt a diagonal mass matrix from the sample variances."""
 
-    def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0,
-                 adaptation_window=101, dtype=None):
+    def __init__(
+        self,
+        n,
+        initial_mean,
+        initial_diag=None,
+        initial_weight=0,
+        adaptation_window=101,
+        dtype=None,
+    ):
         """Set up a diagonal mass matrix."""
         if initial_diag is not None and initial_diag.ndim != 1:
-            raise ValueError('Initial diagonal must be one-dimensional.')
+            raise ValueError("Initial diagonal must be one-dimensional.")
         if initial_mean.ndim != 1:
-            raise ValueError('Initial mean must be one-dimensional.')
+            raise ValueError("Initial mean must be one-dimensional.")
         if initial_diag is not None and len(initial_diag) != n:
-            raise ValueError('Wrong shape for initial_diag: expected %s got %s'
-                             % (n, len(initial_diag)))
+            raise ValueError(
+                "Wrong shape for initial_diag: expected %s got %s"
+                % (n, len(initial_diag))
+            )
         if len(initial_mean) != n:
-            raise ValueError('Wrong shape for initial_mean: expected %s got %s'
-                             % (n, len(initial_mean)))
+            raise ValueError(
+                "Wrong shape for initial_mean: expected %s got %s"
+                % (n, len(initial_mean))
+            )
 
         if dtype is None:
             dtype = theano.config.floatX
@@ -150,9 +170,10 @@ def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0,
         self._var = np.array(initial_diag, dtype=self.dtype, copy=True)
         self._var_theano = theano.shared(self._var)
         self._stds = np.sqrt(initial_diag)
-        self._inv_stds = floatX(1.) / self._stds
+        self._inv_stds = floatX(1.0) / self._stds
         self._foreground_var = _WeightedVariance(
-            self._n, initial_mean, initial_diag, initial_weight, self.dtype)
+            self._n, initial_mean, initial_diag, initial_weight, self.dtype
+        )
         self._background_var = _WeightedVariance(self._n, dtype=self.dtype)
         self._n_samples = 0
         self.adaptation_window = adaptation_window
@@ -224,11 +245,13 @@ def raise_ok(self, vmap):
                 for i in range(slclen):
                     name_slc.append((vmap_.var, i))
             index = np.where(self._stds == 0)[0]
-            errmsg = ['Mass matrix contains zeros on the diagonal. ']
+            errmsg = ["Mass matrix contains zeros on the diagonal. "]
             for ii in index:
-                errmsg.append('The derivative of RV `{}`.ravel()[{}]'
-                              ' is zero.'.format(*name_slc[ii]))
-            raise ValueError('\n'.join(errmsg))
+                errmsg.append(
+                    "The derivative of RV `{}`.ravel()[{}]"
+                    " is zero.".format(*name_slc[ii])
+                )
+            raise ValueError("\n".join(errmsg))
 
         if np.any(~np.isfinite(self._stds)):
             name_slc = []
@@ -238,11 +261,13 @@ def raise_ok(self, vmap):
                 for i in range(slclen):
                     name_slc.append((vmap_.var, i))
             index = np.where(~np.isfinite(self._stds))[0]
-            errmsg = ['Mass matrix contains non-finite values on the diagonal. ']
+            errmsg = ["Mass matrix contains non-finite values on the diagonal. "]
             for ii in index:
-                errmsg.append('The derivative of RV `{}`.ravel()[{}]'
-                              ' is non-finite.'.format(*name_slc[ii]))
-            raise ValueError('\n'.join(errmsg))
+                errmsg.append(
+                    "The derivative of RV `{}`.ravel()[{}]"
+                    " is non-finite.".format(*name_slc[ii])
+                )
+            raise ValueError("\n".join(errmsg))
 
 
 class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt):
@@ -289,26 +314,32 @@ def update(self, sample, grad, tune):
 class _WeightedVariance(object):
     """Online algorithm for computing mean of variance."""
 
-    def __init__(self, nelem, initial_mean=None, initial_variance=None,
-                 initial_weight=0, dtype='d'):
+    def __init__(
+        self,
+        nelem,
+        initial_mean=None,
+        initial_variance=None,
+        initial_weight=0,
+        dtype="d",
+    ):
         self._dtype = dtype
         self.w_sum = float(initial_weight)
         self.w_sum2 = float(initial_weight) ** 2
         if initial_mean is None:
-            self.mean = np.zeros(nelem, dtype='d')
+            self.mean = np.zeros(nelem, dtype="d")
         else:
-            self.mean = np.array(initial_mean, dtype='d', copy=True)
+            self.mean = np.array(initial_mean, dtype="d", copy=True)
         if initial_variance is None:
-            self.raw_var = np.zeros(nelem, dtype='d')
+            self.raw_var = np.zeros(nelem, dtype="d")
         else:
-            self.raw_var = np.array(initial_variance, dtype='d', copy=True)
+            self.raw_var = np.array(initial_variance, dtype="d", copy=True)
 
         self.raw_var[:] *= self.w_sum
 
         if self.raw_var.shape != (nelem,):
-            raise ValueError('Invalid shape for initial variance.')
+            raise ValueError("Invalid shape for initial variance.")
         if self.mean.shape != (nelem,):
-            raise ValueError('Invalid shape for initial mean.')
+            raise ValueError("Invalid shape for initial mean.")
 
     def add_sample(self, x, weight):
         x = np.asarray(x)
@@ -322,7 +353,7 @@ def add_sample(self, x, weight):
 
     def current_variance(self, out=None):
         if self.w_sum == 0:
-            raise ValueError('Can not compute variance without samples.')
+            raise ValueError("Can not compute variance without samples.")
         if out is not None:
             return np.divide(self.raw_var, self.w_sum, out=out)
         else:
@@ -347,10 +378,10 @@ def __init__(self, v, dtype=None):
             dtype = theano.config.floatX
         self.dtype = dtype
         v = v.astype(self.dtype)
-        s = v ** .5
+        s = v ** 0.5
 
         self.s = s
-        self.inv_s = 1. / s
+        self.inv_s = 1.0 / s
         self.v = v
 
     def velocity(self, x, out=None):
@@ -368,7 +399,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is not None:
             return 0.5 * np.dot(x, velocity)
-        return .5 * x.dot(self.v * x)
+        return 0.5 * x.dot(self.v * x)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -408,7 +439,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is None:
             velocity = self.velocity(x)
-        return .5 * x.dot(velocity)
+        return 0.5 * x.dot(velocity)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -446,7 +477,7 @@ def energy(self, x, velocity=None):
         """Compute kinetic energy at a position in parameter space."""
         if velocity is None:
             velocity = self.velocity(x)
-        return .5 * x.dot(velocity)
+        return 0.5 * x.dot(velocity)
 
     def velocity_energy(self, x, v_out):
         """Compute velocity and return kinetic energy at a position in parameter space."""
@@ -458,12 +489,13 @@ def velocity_energy(self, x, v_out):
 
 try:
     import sksparse.cholmod as cholmod
+
     chol_available = True
 except ImportError:
     chol_available = False
 
 if chol_available:
-    __all__ += ['QuadPotentialSparse']
+    __all__ += ["QuadPotentialSparse"]
 
     import theano.sparse
 
diff --git a/pymc3/step_methods/hmc/trajectory.py b/pymc3/step_methods/hmc/trajectory.py
index 70efcb2259..865ac039e8 100644
--- a/pymc3/step_methods/hmc/trajectory.py
+++ b/pymc3/step_methods/hmc/trajectory.py
@@ -48,10 +48,12 @@ def _theano_energy_function(H, q, **theano_kwargs):
     energy_function : theano function that computes the energy at a point (p, q) in phase space
     p : Starting momentum variable.
     """
-    p = tt.vector('p')
+    p = tt.vector("p")
     p.tag.test_value = q.tag.test_value
     total_energy = H.pot.energy(p) - H.logp(q)
-    energy_function = theano.function(inputs=[q, p], outputs=total_energy, **theano_kwargs)
+    energy_function = theano.function(
+        inputs=[q, p], outputs=total_energy, **theano_kwargs
+    )
     energy_function.trust_input = True
 
     return energy_function, p
@@ -81,23 +83,31 @@ def _theano_leapfrog_integrator(H, q, p, **theano_kwargs):
     theano function which returns
     q_new, p_new, energy_new
     """
-    epsilon = tt.scalar('epsilon')
-    epsilon.tag.test_value = 1.
+    epsilon = tt.scalar("epsilon")
+    epsilon.tag.test_value = 1.0
 
-    n_steps = tt.iscalar('n_steps')
+    n_steps = tt.iscalar("n_steps")
     n_steps.tag.test_value = 2
 
     q_new, p_new = leapfrog(H, q, p, epsilon, n_steps)
     energy_new = energy(H, q_new, p_new)
 
-    f = theano.function([q, p, epsilon, n_steps], [q_new, p_new, energy_new], **theano_kwargs)
+    f = theano.function(
+        [q, p, epsilon, n_steps], [q_new, p_new, energy_new], **theano_kwargs
+    )
     f.trust_input = True
     return f
 
 
-def get_theano_hamiltonian_functions(model_vars, shared, logpt, potential,
-                                     use_single_leapfrog=False,
-                                     integrator="leapfrog", **theano_kwargs):
+def get_theano_hamiltonian_functions(
+    model_vars,
+    shared,
+    logpt,
+    potential,
+    use_single_leapfrog=False,
+    integrator="leapfrog",
+    **theano_kwargs
+):
     """Construct theano functions for the Hamiltonian, energy, and leapfrog integrator.
 
     Parameters
@@ -174,15 +184,19 @@ def leapfrog(H, q, p, epsilon, n_steps):
     momentum : Theano.tensor
         momentum estimate at time :math:`n \cdot e`.
     """
+
     def full_update(p, q):
         p = p + epsilon * H.dlogp(q)
         q += epsilon * H.pot.velocity(p)
         return p, q
+
     #  This first line can't be +=, possibly because of theano
     p = p + 0.5 * epsilon * H.dlogp(q)  # half momentum update
     q += epsilon * H.pot.velocity(p)  # full position update
     if tt.gt(n_steps, 1):
-        (p_seq, q_seq), _ = theano.scan(full_update, outputs_info=[p, q], n_steps=n_steps - 1)
+        (p_seq, q_seq), _ = theano.scan(
+            full_update, outputs_info=[p, q], n_steps=n_steps - 1
+        )
         p, q = p_seq[-1], q_seq[-1]
     p += 0.5 * epsilon * H.dlogp(q)  # half momentum update
     return q, p
@@ -203,8 +217,8 @@ def _theano_single_threestage(H, q, p, q_grad, **theano_kwargs):
     Hamiltonian Monte Carlo." arXiv:1608.07048 [Stat],
     August 25, 2016. http://arxiv.org/abs/1608.07048.
     """
-    epsilon = tt.scalar('epsilon')
-    epsilon.tag.test_value = 1.
+    epsilon = tt.scalar("epsilon")
+    epsilon.tag.test_value = 1.0
 
     a = 12127897.0 / 102017882
     b = 4271554.0 / 14421423
@@ -227,9 +241,11 @@ def _theano_single_threestage(H, q, p, q_grad, **theano_kwargs):
 
     new_energy = energy(H, q_e, p_e)
 
-    f = theano.function(inputs=[q, p, q_grad, epsilon],
-                        outputs=[q_e, p_e, v_e, grad_e, new_energy],
-                        **theano_kwargs)
+    f = theano.function(
+        inputs=[q, p, q_grad, epsilon],
+        outputs=[q_e, p_e, v_e, grad_e, new_energy],
+        **theano_kwargs
+    )
     f.trust_input = True
     return f
 
@@ -249,8 +265,8 @@ def _theano_single_twostage(H, q, p, q_grad, **theano_kwargs):
     Hamiltonian Monte Carlo." arXiv:1608.07048 [Stat],
     August 25, 2016. http://arxiv.org/abs/1608.07048.
     """
-    epsilon = tt.scalar('epsilon')
-    epsilon.tag.test_value = 1.
+    epsilon = tt.scalar("epsilon")
+    epsilon.tag.test_value = 1.0
 
     a = floatX((3 - np.sqrt(3)) / 6)
 
@@ -263,9 +279,11 @@ def _theano_single_twostage(H, q, p, q_grad, **theano_kwargs):
     v_e = H.pot.velocity(p_e)
 
     new_energy = energy(H, q_e, p_e)
-    f = theano.function(inputs=[q, p, q_grad, epsilon],
-                        outputs=[q_e, p_e, v_e, grad_e, new_energy],
-                        **theano_kwargs)
+    f = theano.function(
+        inputs=[q, p, q_grad, epsilon],
+        outputs=[q_e, p_e, v_e, grad_e, new_energy],
+        **theano_kwargs
+    )
     f.trust_input = True
     return f
 
@@ -276,8 +294,8 @@ def _theano_single_leapfrog(H, q, p, q_grad, **theano_kwargs):
     See above for documentation.  This is optimized for the case where only a single step is
     needed, in case of, for example, a recursive algorithm.
     """
-    epsilon = tt.scalar('epsilon')
-    epsilon.tag.test_value = 1.
+    epsilon = tt.scalar("epsilon")
+    epsilon.tag.test_value = 1.0
 
     p_new = p + 0.5 * epsilon * q_grad  # half momentum update
     q_new = q + epsilon * H.pot.velocity(p_new)  # full position update
@@ -286,15 +304,17 @@ def _theano_single_leapfrog(H, q, p, q_grad, **theano_kwargs):
     energy_new = energy(H, q_new, p_new)
     v_new = H.pot.velocity(p_new)
 
-    f = theano.function(inputs=[q, p, q_grad, epsilon],
-                        outputs=[q_new, p_new, v_new, q_new_grad, energy_new],
-                        **theano_kwargs)
+    f = theano.function(
+        inputs=[q, p, q_grad, epsilon],
+        outputs=[q_new, p_new, v_new, q_new_grad, energy_new],
+        **theano_kwargs
+    )
     f.trust_input = True
     return f
 
 
 INTEGRATORS_SINGLE = {
-    'leapfrog': _theano_single_leapfrog,
-    'two-stage': _theano_single_twostage,
-    'three-stage': _theano_single_threestage,
+    "leapfrog": _theano_single_leapfrog,
+    "two-stage": _theano_single_twostage,
+    "three-stage": _theano_single_threestage,
 }
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 1b0228bc73..2958ad9438 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -5,13 +5,28 @@
 import warnings
 
 from ..distributions import draw_values
-from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
+from .arraystep import (
+    ArrayStepShared,
+    PopulationArrayStepShared,
+    ArrayStep,
+    metrop_select,
+    Competence,
+)
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
-           'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
-           'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
+__all__ = [
+    "Metropolis",
+    "DEMetropolis",
+    "BinaryMetropolis",
+    "BinaryGibbsMetropolis",
+    "CategoricalGibbsMetropolis",
+    "NormalProposal",
+    "CauchyProposal",
+    "LaplaceProposal",
+    "PoissonProposal",
+    "MultivariateNormalProposal",
+]
 
 # Available proposal distributions for Metropolis
 
@@ -39,7 +54,9 @@ def __call__(self):
 class LaplaceProposal(Proposal):
     def __call__(self):
         size = np.size(self.s)
-        return (nr.standard_exponential(size=size) - nr.standard_exponential(size=size)) * self.s
+        return (
+            nr.standard_exponential(size=size) - nr.standard_exponential(size=size)
+        ) * self.s
 
 
 class PoissonProposal(Proposal):
@@ -88,17 +105,25 @@ class Metropolis(ArrayStepShared):
     mode :  string or `Mode` instance.
         compilation mode passed to Theano functions
     """
-    name = 'metropolis'
+
+    name = "metropolis"
 
     default_blocked = False
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'tune': np.bool,
-    }]
-
-    def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
-                 tune=True, tune_interval=100, model=None, mode=None, **kwargs):
+    stats_dtypes = [{"accept": np.float64, "tune": np.bool}]
+
+    def __init__(
+        self,
+        vars=None,
+        S=None,
+        proposal_dist=None,
+        scaling=1.0,
+        tune=True,
+        tune_interval=100,
+        model=None,
+        mode=None,
+        **kwargs
+    ):
 
         model = pm.modelcontext(model)
 
@@ -118,7 +143,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
         else:
             raise ValueError("Invalid rank for variance: %s" % S.ndim)
 
-        self.scaling = np.atleast_1d(scaling).astype('d')
+        self.scaling = np.atleast_1d(scaling).astype("d")
         self.tune = tune
         self.tune_interval = tune_interval
         self.steps_until_tune = tune_interval
@@ -126,7 +151,8 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
 
         # Determine type of variables
         self.discrete = np.concatenate(
-            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars])
+            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]
+        )
         self.any_discrete = self.discrete.any()
         self.all_discrete = self.discrete.all()
 
@@ -139,8 +165,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, scaling=1.,
     def astep(self, q0):
         if not self.steps_until_tune and self.tune:
             # Tune scaling parameter
-            self.scaling = tune(
-                self.scaling, self.accepted / float(self.tune_interval))
+            self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
             self.accepted = 0
@@ -149,13 +174,12 @@ def astep(self, q0):
 
         if self.any_discrete:
             if self.all_discrete:
-                delta = np.round(delta, 0).astype('int64')
-                q0 = q0.astype('int64')
-                q = (q0 + delta).astype('int64')
+                delta = np.round(delta, 0).astype("int64")
+                q0 = q0.astype("int64")
+                q = (q0 + delta).astype("int64")
             else:
-                delta[self.discrete] = np.round(
-                    delta[self.discrete], 0)
-                q = (q0 + delta)
+                delta[self.discrete] = np.round(delta[self.discrete], 0)
+                q = q0 + delta
         else:
             q = floatX(q0 + delta)
 
@@ -165,10 +189,7 @@ def astep(self, q0):
 
         self.steps_until_tune -= 1
 
-        stats = {
-            'tune': self.tune,
-            'accept': np.exp(accept),
-        }
+        stats = {"tune": self.tune, "accept": np.exp(accept)}
 
         return q_new, [stats]
 
@@ -233,16 +254,13 @@ class BinaryMetropolis(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-    name = 'binary_metropolis'
+
+    name = "binary_metropolis"
 
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'tune': np.bool,
-        'p_jump': np.float64,
-    }]
+    stats_dtypes = [{"accept": np.float64, "tune": np.bool, "p_jump": np.float64}]
 
-    def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None):
+    def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None):
 
         model = pm.modelcontext(model)
 
@@ -253,42 +271,36 @@ def __init__(self, vars, scaling=1., tune=True, tune_interval=100, model=None):
         self.accepted = 0
 
         if not all([v.dtype in pm.discrete_types for v in vars]):
-            raise ValueError(
-                'All variables must be Bernoulli for BinaryMetropolis')
+            raise ValueError("All variables must be Bernoulli for BinaryMetropolis")
 
         super(BinaryMetropolis, self).__init__(vars, [model.fastlogp])
 
     def astep(self, q0, logp):
 
         # Convert adaptive_scale_factor to a jump probability
-        p_jump = 1. - .5 ** self.scaling
+        p_jump = 1.0 - 0.5 ** self.scaling
 
         rand_array = nr.random(q0.shape)
         q = np.copy(q0)
         # Locations where switches occur, according to p_jump
-        switch_locs = (rand_array < p_jump)
+        switch_locs = rand_array < p_jump
         q[switch_locs] = True - q[switch_locs]
 
         accept = logp(q) - logp(q0)
         q_new, accepted = metrop_select(accept, q, q0)
         self.accepted += accepted
 
-        stats = {
-            'tune': self.tune,
-            'accept': np.exp(accept),
-            'p_jump': p_jump,
-        }
+        stats = {"tune": self.tune, "accept": np.exp(accept), "p_jump": p_jump}
 
         return q_new, [stats]
 
     @staticmethod
     def competence(var):
-        '''
+        """
         BinaryMetropolis is only suitable for binary (bool)
         and Categorical variables with k=1.
-        '''
-        distribution = getattr(
-            var.distribution, 'parent_dist', var.distribution)
+        """
+        distribution = getattr(var.distribution, "parent_dist", var.distribution)
         if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types):
             return Competence.COMPATIBLE
         elif isinstance(distribution, pm.Categorical) and (distribution.k == 2):
@@ -313,9 +325,10 @@ class BinaryGibbsMetropolis(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-    name = 'binary_gibbs_metropolis'
 
-    def __init__(self, vars, order='random', transit_p=.8, model=None):
+    name = "binary_gibbs_metropolis"
+
+    def __init__(self, vars, order="random", transit_p=0.8, model=None):
 
         model = pm.modelcontext(model)
 
@@ -324,18 +337,17 @@ def __init__(self, vars, order='random', transit_p=.8, model=None):
 
         self.dim = sum(v.dsize for v in vars)
 
-        if order == 'random':
+        if order == "random":
             self.shuffle_dims = True
             self.order = list(range(self.dim))
         else:
             if sorted(order) != list(range(self.dim)):
-                raise ValueError('Argument \'order\' has to be a permutation')
+                raise ValueError("Argument 'order' has to be a permutation")
             self.shuffle_dims = False
             self.order = order
 
         if not all([v.dtype in pm.discrete_types for v in vars]):
-            raise ValueError(
-                'All variables must be binary for BinaryGibbsMetropolis')
+            raise ValueError("All variables must be binary for BinaryGibbsMetropolis")
 
         super(BinaryGibbsMetropolis, self).__init__(vars, [model.fastlogp])
 
@@ -353,7 +365,9 @@ def astep(self, q0, logp):
             if nr.rand() < self.transit_p:
                 curr_val, q[idx] = q[idx], True - q[idx]
                 logp_prop = logp(q)
-                q[idx], accepted = metrop_select(logp_prop - logp_curr, q[idx], curr_val)
+                q[idx], accepted = metrop_select(
+                    logp_prop - logp_curr, q[idx], curr_val
+                )
                 if accepted:
                     logp_curr = logp_prop
 
@@ -361,12 +375,11 @@ def astep(self, q0, logp):
 
     @staticmethod
     def competence(var):
-        '''
+        """
         BinaryMetropolis is only suitable for Bernoulli
         and Categorical variables with k=2.
-        '''
-        distribution = getattr(
-            var.distribution, 'parent_dist', var.distribution)
+        """
+        distribution = getattr(var.distribution, "parent_dist", var.distribution)
         if isinstance(distribution, pm.Bernoulli) or (var.dtype in pm.bool_types):
             return Competence.IDEAL
         elif isinstance(distribution, pm.Categorical) and (distribution.k == 2):
@@ -382,9 +395,10 @@ class CategoricalGibbsMetropolis(ArrayStep):
        which was introduced by Liu in his 1996 technical report
        "Metropolized Gibbs Sampler: An Improvement".
     """
-    name = 'caregorical_gibbs_metropolis'
 
-    def __init__(self, vars, proposal='uniform', order='random', model=None):
+    name = "caregorical_gibbs_metropolis"
+
+    def __init__(self, vars, proposal="uniform", order="random", model=None):
 
         model = pm.modelcontext(model)
         vars = pm.inputvars(vars)
@@ -395,34 +409,37 @@ def __init__(self, vars, proposal='uniform', order='random', model=None):
         # variable with M categories and y being a 3-D variable with N
         # categories, we will have dimcats = [(0, M), (1, M), (2, N), (3, N), (4, N)].
         for v in vars:
-            distr = getattr(v.distribution, 'parent_dist', v.distribution)
+            distr = getattr(v.distribution, "parent_dist", v.distribution)
             if isinstance(distr, pm.Categorical):
                 k = draw_values([distr.k])[0]
             elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types):
                 k = 2
             else:
-                raise ValueError('All variables must be categorical or binary' +
-                                 'for CategoricalGibbsMetropolis')
+                raise ValueError(
+                    "All variables must be categorical or binary"
+                    + "for CategoricalGibbsMetropolis"
+                )
             start = len(dimcats)
             dimcats += [(dim, k) for dim in range(start, start + v.dsize)]
 
-        if order == 'random':
+        if order == "random":
             self.shuffle_dims = True
             self.dimcats = dimcats
         else:
             if sorted(order) != list(range(len(dimcats))):
-                raise ValueError('Argument \'order\' has to be a permutation')
+                raise ValueError("Argument 'order' has to be a permutation")
             self.shuffle_dims = False
             self.dimcats = [dimcats[j] for j in order]
 
-        if proposal == 'uniform':
+        if proposal == "uniform":
             self.astep = self.astep_unif
-        elif proposal == 'proportional':
+        elif proposal == "proportional":
             # Use the optimized "Metropolized Gibbs Sampler" described in Liu96.
             self.astep = self.astep_prop
         else:
-            raise ValueError('Argument \'proposal\' should either be ' +
-                    '\'uniform\' or \'proportional\'')
+            raise ValueError(
+                "Argument 'proposal' should either be " + "'uniform' or 'proportional'"
+            )
 
         super(CategoricalGibbsMetropolis, self).__init__(vars, [model.fastlogp])
 
@@ -466,8 +483,8 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
                 log_probs[candidate_cat] = logp(q)
         probs = softmax(log_probs)
         prob_curr, probs[given_cat] = probs[given_cat], 0.0
-        probs /= (1.0 - prob_curr)
-        proposed_cat = nr.choice(candidates, p = probs)
+        probs /= 1.0 - prob_curr
+        proposed_cat = nr.choice(candidates, p=probs)
         accept_ratio = (1.0 - prob_curr) / (1.0 - probs[proposed_cat])
         if not np.isfinite(accept_ratio) or nr.uniform() >= accept_ratio:
             q[dim] = given_cat
@@ -477,12 +494,11 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
 
     @staticmethod
     def competence(var):
-        '''
+        """
         CategoricalGibbsMetropolis is only suitable for Bernoulli and
         Categorical variables.
-        '''
-        distribution = getattr(
-            var.distribution, 'parent_dist', var.distribution)
+        """
+        distribution = getattr(var.distribution, "parent_dist", var.distribution)
         if isinstance(distribution, pm.Categorical):
             if distribution.k > 2:
                 return Competence.IDEAL
@@ -526,19 +542,30 @@ class DEMetropolis(PopulationArrayStepShared):
         Statistics and Computing
         `link <https://doi.org/10.1007/s11222-006-8769-1>`__
     """
-    name = 'DEMetropolis'
+
+    name = "DEMetropolis"
 
     default_blocked = True
     generates_stats = True
-    stats_dtypes = [{
-        'accept': np.float64,
-        'tune': np.bool,
-    }]
-
-    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
-                 tune=True, tune_interval=100, model=None, mode=None, **kwargs):
-        warnings.warn('Population based sampling methods such as DEMetropolis are experimental.' \
-            ' Use carefully and be extra critical about their results!')
+    stats_dtypes = [{"accept": np.float64, "tune": np.bool}]
+
+    def __init__(
+        self,
+        vars=None,
+        S=None,
+        proposal_dist=None,
+        lamb=None,
+        scaling=0.001,
+        tune=True,
+        tune_interval=100,
+        model=None,
+        mode=None,
+        **kwargs
+    ):
+        warnings.warn(
+            "Population based sampling methods such as DEMetropolis are experimental."
+            " Use carefully and be extra critical about their results!"
+        )
 
         model = pm.modelcontext(model)
 
@@ -554,7 +581,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
         else:
             self.proposal_dist = UniformProposal(S)
 
-        self.scaling = np.atleast_1d(scaling).astype('d')
+        self.scaling = np.atleast_1d(scaling).astype("d")
         if lamb is None:
             lamb = 2.38 / np.sqrt(2 * S.size)
         self.lamb = float(lamb)
@@ -572,8 +599,7 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
     def astep(self, q0):
         if not self.steps_until_tune and self.tune:
             # Tune scaling parameter
-            self.scaling = tune(
-                self.scaling, self.accepted / float(self.tune_interval))
+            self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
             # Reset counter
             self.steps_until_tune = self.tune_interval
             self.accepted = 0
@@ -594,10 +620,7 @@ def astep(self, q0):
 
         self.steps_until_tune -= 1
 
-        stats = {
-            'tune': self.tune,
-            'accept': np.exp(accept),
-        }
+        stats = {"tune": self.tune, "accept": np.exp(accept)}
 
         return q_new, [stats]
 
@@ -617,14 +640,14 @@ def sample_except(limit, excluded):
 
 def softmax(x):
     e_x = np.exp(x - np.max(x))
-    return e_x / np.sum(e_x, axis = 0)
+    return e_x / np.sum(e_x, axis=0)
 
 
 def delta_logp(logp, vars, shared):
     [logp0], inarray0 = pm.join_nonshared_inputs([logp], vars, shared)
 
     tensor_type = inarray0.type
-    inarray1 = tensor_type('inarray1')
+    inarray1 = tensor_type("inarray1")
 
     logp1 = pm.CallableTensor(logp0)(inarray1)
 
diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py
index 45d71e9098..5c0b437a05 100644
--- a/pymc3/step_methods/sgmcmc.py
+++ b/pymc3/step_methods/sgmcmc.py
@@ -9,10 +9,12 @@
 import theano
 import numpy as np
 
-__all__ = ['SGFS', 'CSG']
+__all__ = ["SGFS", "CSG"]
 
-EXPERIMENTAL_WARNING = "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet"\
+EXPERIMENTAL_WARNING = (
+    "Warning: Stochastic Gradient based sampling methods are experimental step methods and not yet"
     " recommended for use in PyMC3!"
+)
 
 
 def _value_error(cond, str):
@@ -23,17 +25,17 @@ def _value_error(cond, str):
 
 def _check_minibatches(minibatch_tensors, minibatches):
     _value_error(
-        isinstance(minibatch_tensors, list),
-        'minibatch_tensors must be a list.')
+        isinstance(minibatch_tensors, list), "minibatch_tensors must be a list."
+    )
 
-    _value_error(
-        hasattr(minibatches, "__iter__"), 'minibatches must be an iterator.')
+    _value_error(hasattr(minibatches, "__iter__"), "minibatches must be an iterator.")
 
 
 def prior_dlogp(vars, model, flat_view):
     """Returns the gradient of the prior on the parameters as a vector of size D x 1"""
     terms = tt.concatenate(
-        [theano.grad(var.logpt, var).flatten() for var in vars], axis=0)
+        [theano.grad(var.logpt, var).flatten() for var in vars], axis=0
+    )
     dlogp = theano.clone(terms, flat_view.replacements, strict=False)
 
     return dlogp
@@ -47,21 +49,23 @@ def elemwise_dlogL(vars, model, flat_view):
     # select one observed random variable
     obs_var = model.observed_RVs[0]
     # tensor of shape (batch_size,)
-    logL = obs_var.logp_elemwiset.sum(
-        axis=tuple(range(1, obs_var.logp_elemwiset.ndim)))
+    logL = obs_var.logp_elemwiset.sum(axis=tuple(range(1, obs_var.logp_elemwiset.ndim)))
     # calculate fisher information
     terms = []
     for var in vars:
-        output, _ =  theano.scan(lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),\
-                           sequences=[tt.arange(logL.shape[0])])
+        output, _ = theano.scan(
+            lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(),
+            sequences=[tt.arange(logL.shape[0])],
+        )
         terms.append(output)
     dlogL = theano.clone(
-        tt.concatenate(terms, axis=1), flat_view.replacements, strict=False)
+        tt.concatenate(terms, axis=1), flat_view.replacements, strict=False
+    )
     return dlogL
 
 
 class BaseStochasticGradient(ArrayStepShared):
-    R"""
+    r"""
     BaseStochasticGradient Object
 
     For working with BaseStochasticGradient Object
@@ -99,16 +103,18 @@ class BaseStochasticGradient(ArrayStepShared):
             Returns None it creates class variables which are required for the training fn
     """
 
-    def __init__(self,
-                 vars=None,
-                 batch_size=None,
-                 total_size=None,
-                 step_size=1.0,
-                 model=None,
-                 random_seed=None,
-                 minibatches=None,
-                 minibatch_tensors=None,
-                 **kwargs):
+    def __init__(
+        self,
+        vars=None,
+        batch_size=None,
+        total_size=None,
+        step_size=1.0,
+        model=None,
+        random_seed=None,
+        minibatches=None,
+        minibatch_tensors=None,
+        **kwargs
+    ):
         warnings.warn(EXPERIMENTAL_WARNING)
 
         model = modelcontext(model)
@@ -124,7 +130,8 @@ def __init__(self,
         self.total_size = total_size
         _value_error(
             total_size != None or batch_size != None,
-            'total_size and batch_size of training data have to be specified')
+            "total_size and batch_size of training data have to be specified",
+        )
         self.expected_iter = int(total_size / batch_size)
 
         # set random stream
@@ -156,12 +163,10 @@ def __init__(self,
             def is_shared(t):
                 return isinstance(t, theano.compile.sharedvalue.SharedVariable)
 
-            tensors = [(t.type() if is_shared(t) else t)
-                       for t in minibatch_tensors]
-            updates = OrderedDict({
-                t: t_
-                for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)
-            })
+            tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors]
+            updates = OrderedDict(
+                {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)}
+            )
             self.minibatch_tensors = tensors
             self.inarray += self.minibatch_tensors
             self.updates.update(updates)
@@ -195,14 +200,14 @@ def astep(self, q0):
         -------
         q
         """
-        if hasattr(self, 'minibatch_tensors'):
+        if hasattr(self, "minibatch_tensors"):
             return q0 + self.training_fn(q0, *next(self.minibatches))
         else:
             return q0 + self.training_fn(q0)
 
 
 class SGFS(BaseStochasticGradient):
-    R"""
+    r"""
     StochasticGradientFisherScoring
 
     Parameters
@@ -220,7 +225,7 @@ class SGFS(BaseStochasticGradient):
     -   Bayesian Posterior Sampling via Stochastic Gradient Fisher Scoring
         Implements Algorithm 1 from the publication http://people.ee.duke.edu/%7Elcarin/782.pdf
     """
-    name = 'stochastic_gradient_fisher_scoring'
+    name = "stochastic_gradient_fisher_scoring"
 
     def __init__(self, vars=None, B=None, step_size_decay=100, **kwargs):
         """
@@ -238,9 +243,8 @@ def __init__(self, vars=None, B=None, step_size_decay=100, **kwargs):
 
     def _initialize_values(self):
         # Init avg_I
-        self.avg_I = theano.shared(
-            np.zeros((self.q_size, self.q_size)), name='avg_I')
-        self.t = theano.shared(1, name='t')
+        self.avg_I = theano.shared(np.zeros((self.q_size, self.q_size)), name="avg_I")
+        self.t = theano.shared(1, name="t")
         # 2. Set gamma
         self.gamma = (self.batch_size + self.total_size) / (self.total_size)
 
@@ -265,12 +269,12 @@ def mk_training_fn(self):
         avg_gt = gt.mean(axis=0)
 
         # 6. Calculate approximate Fisher Score
-        gt_diff = (gt - avg_gt)
+        gt_diff = gt - avg_gt
 
-        V = (1. / (n - 1)) * tt.dot(gt_diff.T, gt_diff)
+        V = (1.0 / (n - 1)) * tt.dot(gt_diff.T, gt_diff)
 
         # 7. Update moving average
-        I_t = (1. - 1. / t) * avg_I + (1. / t) * V
+        I_t = (1.0 - 1.0 / t) * avg_I + (1.0 / t) * V
 
         if B is None:
             # if B is not specified
@@ -287,23 +291,23 @@ def mk_training_fn(self):
         # where B_ch is cholesky decomposition of B
         # i.e. B = dot(B_ch, B_ch^T)
         B_ch = tt.slinalg.cholesky(B)
-        noise_term = tt.dot((2.*B_ch)/tt.sqrt(epsilon), \
-                random.normal((q_size,), dtype=theano.config.floatX))
+        noise_term = tt.dot(
+            (2.0 * B_ch) / tt.sqrt(epsilon),
+            random.normal((q_size,), dtype=theano.config.floatX),
+        )
         # 9.
         # Inv. Fisher Cov. Matrix
-        cov_mat = (gamma * I_t * N) + ((4. / epsilon) * B)
+        cov_mat = (gamma * I_t * N) + ((4.0 / epsilon) * B)
         inv_cov_mat = tt.nlinalg.matrix_inverse(cov_mat)
         # Noise Coefficient
-        noise_coeff = (dlog_prior + (N * avg_gt) + noise_term)
+        noise_coeff = dlog_prior + (N * avg_gt) + noise_term
         dq = 2 * tt.dot(inv_cov_mat, noise_coeff)
 
         updates.update({avg_I: I_t, t: t + 1})
 
         f = theano.function(
-            outputs=dq,
-            inputs=inarray,
-            updates=updates,
-            allow_input_downcast=True)
+            outputs=dq, inputs=inarray, updates=updates, allow_input_downcast=True
+        )
 
         return f
 
@@ -315,7 +319,7 @@ def competence(var, has_grad):
 
 
 class CSG(BaseStochasticGradient):
-    R"""
+    r"""
     CSG: ConstantStochasticGradient
     
     It is an approximate stochastic variational inference algorithm
@@ -337,7 +341,7 @@ class CSG(BaseStochasticGradient):
     -   Stochastic Gradient Descent as Approximate Bayesian Inference
         https://arxiv.org/pdf/1704.04289v1.pdf
     """
-    name = 'constant_stochastic_gradient'
+    name = "constant_stochastic_gradient"
 
     def __init__(self, vars=None, **kwargs):
         """
@@ -351,9 +355,8 @@ def __init__(self, vars=None, **kwargs):
 
     def _initialize_values(self):
         # Init avg_C: Noise Covariance Moving Average
-        self.avg_C = theano.shared(
-            np.zeros((self.q_size, self.q_size)), name='avg_C')
-        self.t = theano.shared(1, name='t')
+        self.avg_C = theano.shared(np.zeros((self.q_size, self.q_size)), name="avg_C")
+        self.t = theano.shared(1, name="t")
         # Init training fn
         self.training_fn = self.mk_training_fn()
 
@@ -373,36 +376,36 @@ def mk_training_fn(self):
         inarray = self.inarray
 
         # gradient of log likelihood
-        gt = -1 * (1. / S) * (self.dlogp_elemwise.sum(axis=0) +
-                              (S / N) * self.dlog_prior)
+        gt = (
+            -1
+            * (1.0 / S)
+            * (self.dlogp_elemwise.sum(axis=0) + (S / N) * self.dlog_prior)
+        )
 
         # update moving average of Noise Covariance
-        gt_diff = (self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0))
-        V = (1. / (S - 1)) * theano.dot(gt_diff.T, gt_diff)
-        C_t = (1. - 1. / t) * avg_C + (1. / t) * V
-        # BB^T = C 
+        gt_diff = self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0)
+        V = (1.0 / (S - 1)) * theano.dot(gt_diff.T, gt_diff)
+        C_t = (1.0 - 1.0 / t) * avg_C + (1.0 / t) * V
+        # BB^T = C
         B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))
         # Optimal Preconditioning Matrix
-        H = (2. * S / N) * tt.nlinalg.matrix_inverse(C_t)
+        H = (2.0 * S / N) * tt.nlinalg.matrix_inverse(C_t)
         # step value on the log likelihood gradient preconditioned with H
-        step = -1 * theano.dot(H, gt.dimshuffle([0, 'x']))
+        step = -1 * theano.dot(H, gt.dimshuffle([0, "x"]))
 
         # sample gaussian noise dW
-        dW = random.normal(
-            (q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0)
+        dW = random.normal((q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0)
         # noise term is inversely proportional to batch size
-        noise_term = (1. / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW))
+        noise_term = (1.0 / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW))
         # step + noise term
         dq = (step + noise_term).flatten()
 
-        # update time and avg_C 
+        # update time and avg_C
         updates.update({avg_C: C_t, t: t + 1})
 
         f = theano.function(
-            outputs=dq,
-            inputs=inarray,
-            updates=updates,
-            allow_input_downcast=True)
+            outputs=dq, inputs=inarray, updates=updates, allow_input_downcast=True
+        )
 
         return f
 
diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py
index a68b18fe05..0267d8e9c7 100644
--- a/pymc3/step_methods/slicer.py
+++ b/pymc3/step_methods/slicer.py
@@ -8,9 +8,9 @@
 from ..theanof import inputvars
 from ..vartypes import continuous_types
 
-__all__ = ['Slice']
+__all__ = ["Slice"]
 
-LOOP_ERR_MSG = 'max slicer iters %d exceeded'
+LOOP_ERR_MSG = "max slicer iters %d exceeded"
 
 
 class Slice(ArrayStep):
@@ -29,15 +29,17 @@ class Slice(ArrayStep):
         Optional model for sampling step. Defaults to None (taken from context).
 
     """
-    name = 'slice'
+
+    name = "slice"
     default_blocked = False
 
-    def __init__(self, vars=None, w=1., tune=True, model=None,
-                 iter_limit=np.inf, **kwargs):
+    def __init__(
+        self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, **kwargs
+    ):
         self.model = modelcontext(model)
         self.w = w
         self.tune = tune
-        self.n_tunes = 0.
+        self.n_tunes = 0.0
         self.iter_limit = iter_limit
 
         if vars is None:
@@ -58,13 +60,13 @@ def astep(self, q0, logp):
             qr[i] = q[i] + self.w[i]
             # Stepping out procedure
             cnt = 0
-            while(y <= logp(ql)):  # changed lt to leq  for locally uniform posteriors
+            while y <= logp(ql):  # changed lt to leq  for locally uniform posteriors
                 ql[i] -= self.w[i]
                 cnt += 1
                 if cnt > self.iter_limit:
                     raise RuntimeError(LOOP_ERR_MSG % self.iter_limit)
             cnt = 0
-            while(y <= logp(qr)):
+            while y <= logp(qr):
                 qr[i] += self.w[i]
                 cnt += 1
                 if cnt > self.iter_limit:
@@ -72,7 +74,9 @@ def astep(self, q0, logp):
 
             cnt = 0
             q[i] = nr.uniform(ql[i], qr[i])
-            while logp(q) < y:  # Changed leq to lt, to accomodate for locally flat posteriors
+            while (
+                logp(q) < y
+            ):  # Changed leq to lt, to accomodate for locally flat posteriors
                 # Sample uniformly from slice
                 if q[i] > q0[i]:
                     qr[i] = q[i]
@@ -83,11 +87,16 @@ def astep(self, q0, logp):
                 if cnt > self.iter_limit:
                     raise RuntimeError(LOOP_ERR_MSG % self.iter_limit)
 
-            if self.tune:  # I was under impression from MacKays lectures that slice width can be tuned without
+            if (
+                self.tune
+            ):  # I was under impression from MacKays lectures that slice width can be tuned without
                 # breaking markovianness. Can we do it regardless of self.tune?(@madanh)
-                self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) +\
-                    (qr[i] - ql[i]) / (self.n_tunes + 1)  # same as before
-            # unobvious and important: return qr and ql to the same point
+                self.w[i] = self.w[i] * (self.n_tunes / (self.n_tunes + 1)) + (
+                    qr[i] - ql[i]
+                ) / (
+                    self.n_tunes + 1
+                )  # same as before
+                # unobvious and important: return qr and ql to the same point
                 qr[i] = q[i]
                 ql[i] = q[i]
         if self.tune:
@@ -101,4 +110,3 @@ def competence(var, has_grad):
                 return Competence.PREFERRED
             return Competence.COMPATIBLE
         return Competence.INCOMPATIBLE
-       
\ No newline at end of file
diff --git a/pymc3/step_methods/smc.py b/pymc3/step_methods/smc.py
index 548bf605cb..bf3a560dca 100644
--- a/pymc3/step_methods/smc.py
+++ b/pymc3/step_methods/smc.py
@@ -15,12 +15,12 @@
 from ..backends.base import MultiTrace
 
 
-__all__ = ['SMC', 'sample_smc']
+__all__ = ["SMC", "sample_smc"]
 
-proposal_dists = {'MultivariateNormal': MultivariateNormalProposal}
+proposal_dists = {"MultivariateNormal": MultivariateNormalProposal}
 
 
-class SMC():
+class SMC:
     """
     Sequential Monte Carlo step
 
@@ -59,8 +59,15 @@ class SMC():
         %282007%29133:7%28816%29>`__
     """
 
-    def __init__(self, n_steps=5, scaling=1., p_acc_rate=0.01, tune=True,
-                 proposal_name='MultivariateNormal', threshold=0.5):
+    def __init__(
+        self,
+        n_steps=5,
+        scaling=1.0,
+        p_acc_rate=0.01,
+        tune=True,
+        proposal_name="MultivariateNormal",
+        threshold=0.5,
+    ):
 
         self.n_steps = n_steps
         self.scaling = scaling
@@ -88,7 +95,9 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed
     random_seed : int
         random seed
     """
-    warnings.warn("Warning: SMC is experimental, hopefully it will be ready for PyMC 3.6")
+    warnings.warn(
+        "Warning: SMC is experimental, hopefully it will be ready for PyMC 3.6"
+    )
     model = modelcontext(model)
 
     if random_seed != -1:
@@ -99,19 +108,23 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed
     acc_rate = 1
     model.marginal_likelihood = 1
     variables = model.vars
-    discrete = np.concatenate([[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables])
+    discrete = np.concatenate(
+        [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables]
+    )
     any_discrete = discrete.any()
     all_discrete = discrete.all()
     shared = make_shared_replacements(variables, model)
     prior_logp = logp_forw([model.varlogpt], variables, shared)
     likelihood_logp = logp_forw([model.datalogpt], variables, shared)
 
-    pm._log.info('Sample initial stage: ...')
+    pm._log.info("Sample initial stage: ...")
     posterior, var_info = _initial_population(draws, model, variables)
 
     while beta < 1:
         # compute plausibility weights (measure fitness)
-        likelihoods = np.array([likelihood_logp(sample) for sample in posterior]).squeeze()
+        likelihoods = np.array(
+            [likelihood_logp(sample) for sample in posterior]
+        ).squeeze()
         beta, old_beta, weights, sj = _calc_beta(beta, likelihoods, step.threshold)
         model.marginal_likelihood *= sj
         # resample based on plausibility weights (selection)
@@ -127,15 +140,18 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed
         # acceptance rate
         if step.tune and stage > 0:
             if acc_rate == 0:
-                acc_rate = 1. / step.n_steps
+                acc_rate = 1.0 / step.n_steps
             step.scaling = _tune(acc_rate)
             step.n_steps = 1 + int(np.log(step.p_acc_rate) / np.log(1 - acc_rate))
 
-        pm._log.info('Stage: {:d} Beta: {:f} Steps: {:d} Acc: {:f}'.format(stage, beta,
-                                                                           step.n_steps, acc_rate))
+        pm._log.info(
+            "Stage: {:d} Beta: {:f} Steps: {:d} Acc: {:f}".format(
+                stage, beta, step.n_steps, acc_rate
+            )
+        )
         # Apply Metropolis kernel (mutation)
-        proposed = 0.
-        accepted = 0.
+        proposed = 0.0
+        accepted = 0.0
         priors = np.array([prior_logp(sample) for sample in posterior]).squeeze()
         tempered_post = priors + likelihoods * beta
         for draw in tqdm(range(draws), disable=not progressbar):
@@ -147,23 +163,25 @@ def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed
 
                 if any_discrete:
                     if all_discrete:
-                        delta = np.round(delta, 0).astype('int64')
-                        q_old = q_old.astype('int64')
-                        q_new = (q_old + delta).astype('int64')
+                        delta = np.round(delta, 0).astype("int64")
+                        q_old = q_old.astype("int64")
+                        q_new = (q_old + delta).astype("int64")
                     else:
                         delta[discrete] = np.round(delta[discrete], 0)
-                        q_new = (q_old + delta)
+                        q_new = q_old + delta
                 else:
                     q_new = floatX(q_old + delta)
 
                 new_tempered_post = prior_logp(q_new) + likelihood_logp(q_new)[0] * beta
 
-                q_old, accept = metrop_select(new_tempered_post - old_tempered_post, q_new, q_old)
+                q_old, accept = metrop_select(
+                    new_tempered_post - old_tempered_post, q_new, q_old
+                )
                 if accept:
                     accepted += accept
                     posterior[draw] = q_old
                     old_tempered_post = new_tempered_post
-                proposed += 1.
+                proposed += 1.0
 
         acc_rate = accepted / proposed
         stage += 1
@@ -219,11 +237,11 @@ def _calc_beta(beta, likelihoods, threshold=0.5):
         Partial marginal likelihood
     """
     low_beta = old_beta = beta
-    up_beta = 2.
+    up_beta = 2.0
     rN = int(len(likelihoods) * threshold)
 
     while up_beta - low_beta > 1e-6:
-        new_beta = (low_beta + up_beta) / 2.
+        new_beta = (low_beta + up_beta) / 2.0
         weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max()))
         weights = weights_un / np.sum(weights_un)
         ESS = int(1 / np.sum(weights ** 2))
@@ -245,7 +263,9 @@ def _calc_covariance(posterior_array, weights):
     """
     Calculate trace covariance matrix based on importance weights.
     """
-    cov = np.cov(np.squeeze(posterior_array), aweights=weights.ravel(), bias=False, rowvar=0)
+    cov = np.cov(
+        np.squeeze(posterior_array), aweights=weights.ravel(), bias=False, rowvar=0
+    )
     if np.isnan(cov).any() or np.isinf(cov).any():
         raise ValueError('Sample covariances not valid! Likely "chains" is too small!')
     return np.atleast_2d(cov)
@@ -265,8 +285,8 @@ def _tune(acc_rate):
     scaling: float
     """
     # a and b after Muto & Beck 2008 .
-    a = 1. / 9
-    b = 8. / 9
+    a = 1.0 / 9
+    b = 8.0 / 9
     return (a + b * acc_rate) ** 2
 
 
@@ -285,7 +305,7 @@ def _posterior_to_trace(posterior, model, var_info):
         size = 0
         for var in varnames:
             shape, new_size = var_info[var]
-            value.append(posterior[i][size:size+new_size].reshape(shape))
+            value.append(posterior[i][size : size + new_size].reshape(shape))
             size += new_size
         strace.record({k: v for k, v in zip(varnames, value)})
     return MultiTrace([strace])
diff --git a/pymc3/step_methods/step_sizes.py b/pymc3/step_methods/step_sizes.py
index 6cc3de5a87..502f46e160 100644
--- a/pymc3/step_methods/step_sizes.py
+++ b/pymc3/step_methods/step_sizes.py
@@ -9,7 +9,7 @@ def __init__(self, initial_step, target, gamma, k, t0):
         self._log_step = np.log(initial_step)
         self._log_bar = self._log_step
         self._target = target
-        self._hbar = 0.
+        self._hbar = 0.0
         self._k = k
         self._t0 = t0
         self._count = 1
@@ -29,8 +29,8 @@ def update(self, accept_stat, tune):
             return
 
         count, k, t0 = self._count, self._k, self._t0
-        w = 1. / (count + t0)
-        self._hbar = ((1 - w) * self._hbar + w * (self._target - accept_stat))
+        w = 1.0 / (count + t0)
+        self._hbar = (1 - w) * self._hbar + w * (self._target - accept_stat)
 
         self._log_step = self._mu - self._hbar * np.sqrt(count) / self._gamma
         mk = count ** -k
@@ -39,8 +39,8 @@ def update(self, accept_stat, tune):
 
     def stats(self):
         return {
-            'step_size': np.exp(self._log_step),
-            'step_size_bar': np.exp(self._log_bar),
+            "step_size": np.exp(self._log_step),
+            "step_size_bar": np.exp(self._log_bar),
         }
 
     def warnings(self):
@@ -53,13 +53,15 @@ def warnings(self):
         n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound
         lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95)
         if target_accept < lower or target_accept > upper:
-            msg = ('The acceptance probability does not match the target. It '
-                   'is %s, but should be close to %s. Try to increase the '
-                   'number of tuning steps.'
-                   % (mean_accept, target_accept))
-            info = {'target': target_accept, 'actual': mean_accept}
+            msg = (
+                "The acceptance probability does not match the target. It "
+                "is %s, but should be close to %s. Try to increase the "
+                "number of tuning steps." % (mean_accept, target_accept)
+            )
+            info = {"target": target_accept, "actual": mean_accept}
             warning = SamplerWarning(
-                WarningType.BAD_ACCEPTANCE, msg, 'warn', None, None, info)
+                WarningType.BAD_ACCEPTANCE, msg, "warn", None, None, info
+            )
             return [warning]
         else:
             return []
diff --git a/pymc3/tests/backend_fixtures.py b/pymc3/tests/backend_fixtures.py
index 6da3d33539..87e4137650 100644
--- a/pymc3/tests/backend_fixtures.py
+++ b/pymc3/tests/backend_fixtures.py
@@ -33,7 +33,7 @@ def setup_method(self):
         with self.model:
             self.strace = self.backend(self.name)
         self.draws, self.chain = 3, 0
-        if not hasattr(self, 'sampler_vars'):
+        if not hasattr(self, "sampler_vars"):
             self.sampler_vars = None
         if self.sampler_vars is not None:
             assert self.strace.supports_sampler_stats
@@ -46,11 +46,11 @@ def test_append_invalid(self):
             with pytest.raises(ValueError):
                 self.strace.setup(self.draws, self.chain)
             with pytest.raises(ValueError):
-                vars = self.sampler_vars + [{'a': np.bool}]
+                vars = self.sampler_vars + [{"a": np.bool}]
                 self.strace.setup(self.draws, self.chain, vars)
         else:
             with pytest.raises((ValueError, TypeError)):
-                self.strace.setup(self.draws, self.chain, [{'a': np.bool}])
+                self.strace.setup(self.draws, self.chain, [{"a": np.bool}])
 
     def test_append(self):
         if self.sampler_vars is None:
@@ -82,20 +82,21 @@ class StatsTestCase(object):
     - name
     - shape
     """
+
     def setup_method(self):
         self.test_point, self.model, _ = models.beta_bernoulli(self.shape)
         self.draws, self.chain = 3, 0
 
     def test_bad_dtype(self):
-        bad_vars = [{'a': np.float64}, {'a': np.bool}]
-        good_vars = [{'a': np.float64}, {'a': np.float64}]
+        bad_vars = [{"a": np.float64}, {"a": np.bool}]
+        good_vars = [{"a": np.float64}, {"a": np.float64}]
         with self.model:
             strace = self.backend(self.name)
         with pytest.raises((ValueError, TypeError)):
             strace.setup(self.draws, self.chain, bad_vars)
         strace.setup(self.draws, self.chain, good_vars)
         if strace.supports_sampler_stats:
-            assert strace.stat_names == set(['a'])
+            assert strace.stat_names == set(["a"])
         else:
             with pytest.raises((ValueError, TypeError)):
                 strace.setup(self.draws, self.chain, good_vars)
@@ -125,6 +126,7 @@ class ModelBackendSampledTestCase(object):
     Children may define
     - sampler_vars
     """
+
     @classmethod
     def setup_class(cls):
         cls.test_point, cls.model, _ = models.beta_bernoulli(cls.shape)
@@ -132,7 +134,7 @@ def setup_class(cls):
             strace0 = cls.backend(cls.name)
             strace1 = cls.backend(cls.name)
 
-        if not hasattr(cls, 'sampler_vars'):
+        if not hasattr(cls, "sampler_vars"):
             cls.sampler_vars = None
 
         cls.draws = 5
@@ -144,16 +146,15 @@ def setup_class(cls):
             strace1.setup(cls.draws, chain=1)
 
         varnames = list(cls.test_point.keys())
-        shapes = {varname: value.shape
-                  for varname, value in cls.test_point.items()}
-        dtypes = {varname: value.dtype
-                  for varname, value in cls.test_point.items()}
+        shapes = {varname: value.shape for varname, value in cls.test_point.items()}
+        dtypes = {varname: value.dtype for varname, value in cls.test_point.items()}
 
         cls.expected = {0: {}, 1: {}}
         for varname in varnames:
             mcmc_shape = (cls.draws,) + shapes[varname]
-            values = np.arange(cls.draws * np.prod(shapes[varname]),
-                               dtype=dtypes[varname])
+            values = np.arange(
+                cls.draws * np.prod(shapes[varname]), dtype=dtypes[varname]
+            )
             cls.expected[0][varname] = values.reshape(mcmc_shape)
             cls.expected[1][varname] = values.reshape(mcmc_shape) * 100
 
@@ -169,17 +170,22 @@ def setup_class(cls):
                     else:
                         stats[key] = np.arange(cls.draws, dtype=dtype)
 
-
         for idx in range(cls.draws):
-            point0 = {varname: cls.expected[0][varname][idx, ...]
-                      for varname in varnames}
-            point1 = {varname: cls.expected[1][varname][idx, ...]
-                      for varname in varnames}
+            point0 = {
+                varname: cls.expected[0][varname][idx, ...] for varname in varnames
+            }
+            point1 = {
+                varname: cls.expected[1][varname][idx, ...] for varname in varnames
+            }
             if cls.sampler_vars is not None:
-                stats1 = [dict((key, val[idx]) for key, val in stats.items())
-                          for stats in cls.expected_stats[0]]
-                stats2 = [dict((key, val[idx]) for key, val in stats.items())
-                          for stats in cls.expected_stats[1]]
+                stats1 = [
+                    dict((key, val[idx]) for key, val in stats.items())
+                    for stats in cls.expected_stats[0]
+                ]
+                stats2 = [
+                    dict((key, val[idx]) for key, val in stats.items())
+                    for stats in cls.expected_stats[1]
+                ]
                 strace0.record(point=point0, sampler_stats=stats1)
                 strace1.record(point=point1, sampler_stats=stats2)
             else:
@@ -223,27 +229,37 @@ class SamplingTestCase(ModelBackendSetupTestCase):
     """
 
     def record_point(self, val):
-        point = {varname: np.tile(val, value.shape)
-                 for varname, value in self.test_point.items()}
+        point = {
+            varname: np.tile(val, value.shape)
+            for varname, value in self.test_point.items()
+        }
         if self.sampler_vars is not None:
-            stats = [dict((key, dtype(val)) for key, dtype in vars.items())
-                     for vars in self.sampler_vars]
+            stats = [
+                dict((key, dtype(val)) for key, dtype in vars.items())
+                for vars in self.sampler_vars
+            ]
             self.strace.record(point=point, sampler_stats=stats)
         else:
             self.strace.record(point=point)
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_standard_close(self):
         for idx in range(self.draws):
             self.record_point(idx)
         self.strace.close()
 
         for varname in self.test_point.keys():
-            npt.assert_equal(self.strace.get_values(varname)[0, ...],
-                             np.zeros(self.strace.var_shapes[varname]))
+            npt.assert_equal(
+                self.strace.get_values(varname)[0, ...],
+                np.zeros(self.strace.var_shapes[varname]),
+            )
             last_idx = self.draws - 1
-            npt.assert_equal(self.strace.get_values(varname)[last_idx, ...],
-                             np.tile(last_idx, self.strace.var_shapes[varname]))
+            npt.assert_equal(
+                self.strace.get_values(varname)[last_idx, ...],
+                np.tile(last_idx, self.strace.var_shapes[varname]),
+            )
         if self.sampler_vars:
             for varname in self.strace.stat_names:
                 vals = self.strace.get_sampler_stats(varname)
@@ -272,41 +288,56 @@ class SelectionTestCase(ModelBackendSampledTestCase):
     - shape
     """
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_get_values_default(self):
         for varname in self.test_point.keys():
-            expected = np.concatenate([self.expected[chain][varname]
-                                       for chain in [0, 1]])
+            expected = np.concatenate(
+                [self.expected[chain][varname] for chain in [0, 1]]
+            )
             result = self.mtrace.get_values(varname)
             npt.assert_equal(result, expected)
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_get_values_nocombine_burn_keyword(self):
         burn = 2
         for varname in self.test_point.keys():
-            expected = [self.expected[0][varname][burn:],
-                        self.expected[1][varname][burn:]]
+            expected = [
+                self.expected[0][varname][burn:],
+                self.expected[1][varname][burn:],
+            ]
             result = self.mtrace.get_values(varname, burn=burn, combine=False)
             npt.assert_equal(result, expected)
 
     def test_len(self):
         assert len(self.mtrace) == self.draws
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_dtypes(self):
         for varname in self.test_point.keys():
-            assert self.expected[0][varname].dtype == \
-                             self.mtrace.get_values(varname, chains=0).dtype
+            assert (
+                self.expected[0][varname].dtype
+                == self.mtrace.get_values(varname, chains=0).dtype
+            )
 
         for statname in self.mtrace.stat_names:
-            assert self.stat_dtypes[statname] == \
-                             self.mtrace.get_sampler_stats(statname, chains=0).dtype
+            assert (
+                self.stat_dtypes[statname]
+                == self.mtrace.get_sampler_stats(statname, chains=0).dtype
+            )
 
     def test_get_values_nocombine_thin_keyword(self):
         thin = 2
         for varname in self.test_point.keys():
-            expected = [self.expected[0][varname][::thin],
-                        self.expected[1][varname][::thin]]
+            expected = [
+                self.expected[0][varname][::thin],
+                self.expected[1][varname][::thin],
+            ]
             result = self.mtrace.get_values(varname, thin=thin, combine=False)
             npt.assert_equal(result, expected)
 
@@ -320,13 +351,18 @@ def test_get_point(self):
     def test_get_slice(self):
         expected = []
         for chain in [0, 1]:
-            expected.append({varname: self.expected[chain][varname][2:]
-                             for varname in self.mtrace.varnames})
+            expected.append(
+                {
+                    varname: self.expected[chain][varname][2:]
+                    for varname in self.mtrace.varnames
+                }
+            )
         result = self.mtrace[2:]
         for chain in [0, 1]:
             for varname in self.test_point.keys():
-                npt.assert_equal(result.get_values(varname, chains=[chain]),
-                                 expected[chain][varname])
+                npt.assert_equal(
+                    result.get_values(varname, chains=[chain]), expected[chain][varname]
+                )
 
     def test_get_slice_step(self):
         result = self.mtrace[:]
@@ -335,9 +371,8 @@ def test_get_slice_step(self):
         result = self.mtrace[::2]
         assert len(result) == self.draws // 2
 
-
     def test_get_slice_neg_step(self):
-        if hasattr(self, 'skip_test_get_slice_neg_step'):
+        if hasattr(self, "skip_test_get_slice_neg_step"):
             return
 
         result = self.mtrace[::-1]
@@ -346,17 +381,21 @@ def test_get_slice_neg_step(self):
         result = self.mtrace[::-2]
         assert len(result) == self.draws // 2
 
-
     def test_get_neg_slice(self):
         expected = []
         for chain in [0, 1]:
-            expected.append({varname: self.expected[chain][varname][-2:]
-                             for varname in self.mtrace.varnames})
+            expected.append(
+                {
+                    varname: self.expected[chain][varname][-2:]
+                    for varname in self.mtrace.varnames
+                }
+            )
         result = self.mtrace[-2:]
         for chain in [0, 1]:
             for varname in self.test_point.keys():
-                npt.assert_equal(result.get_values(varname, chains=[chain]),
-                                 expected[chain][varname])
+                npt.assert_equal(
+                    result.get_values(varname, chains=[chain]), expected[chain][varname]
+                )
 
     def test_get_values_one_chain(self):
         for varname in self.test_point.keys():
@@ -367,8 +406,7 @@ def test_get_values_one_chain(self):
     def test_get_values_nocombine_chains_reversed(self):
         for varname in self.test_point.keys():
             expected = [self.expected[1][varname], self.expected[0][varname]]
-            result = self.mtrace.get_values(varname, chains=[1, 0],
-                                            combine=False)
+            result = self.mtrace.get_values(varname, chains=[1, 0], combine=False)
             npt.assert_equal(result, expected)
 
     def test_nchains(self):
@@ -376,51 +414,54 @@ def test_nchains(self):
 
     def test_get_values_one_chain_int_arg(self):
         for varname in self.test_point.keys():
-            npt.assert_equal(self.mtrace.get_values(varname, chains=[0]),
-                             self.mtrace.get_values(varname, chains=0))
+            npt.assert_equal(
+                self.mtrace.get_values(varname, chains=[0]),
+                self.mtrace.get_values(varname, chains=0),
+            )
 
     def test_get_values_combine(self):
         for varname in self.test_point.keys():
-            expected = np.concatenate([self.expected[chain][varname]
-                                       for chain in [0, 1]])
+            expected = np.concatenate(
+                [self.expected[chain][varname] for chain in [0, 1]]
+            )
             result = self.mtrace.get_values(varname, combine=True)
             npt.assert_equal(result, expected)
 
     def test_get_values_combine_burn_arg(self):
         burn = 2
         for varname in self.test_point.keys():
-            expected = np.concatenate([self.expected[chain][varname][burn:]
-                                       for chain in [0, 1]])
+            expected = np.concatenate(
+                [self.expected[chain][varname][burn:] for chain in [0, 1]]
+            )
             result = self.mtrace.get_values(varname, combine=True, burn=burn)
             npt.assert_equal(result, expected)
 
     def test_get_values_combine_thin_arg(self):
         thin = 2
         for varname in self.test_point.keys():
-            expected = np.concatenate([self.expected[chain][varname][::thin]
-                                       for chain in [0, 1]])
+            expected = np.concatenate(
+                [self.expected[chain][varname][::thin] for chain in [0, 1]]
+            )
             result = self.mtrace.get_values(varname, combine=True, thin=thin)
             npt.assert_equal(result, expected)
 
     def test_getitem_equivalence(self):
         mtrace = self.mtrace
         for varname in self.test_point.keys():
-            npt.assert_equal(mtrace[varname],
-                             mtrace.get_values(varname, combine=True))
-            npt.assert_equal(mtrace[varname, 2:],
-                             mtrace.get_values(varname, burn=2,
-                                               combine=True))
-            npt.assert_equal(mtrace[varname, 2::2],
-                             mtrace.get_values(varname, burn=2, thin=2,
-                                               combine=True))
+            npt.assert_equal(mtrace[varname], mtrace.get_values(varname, combine=True))
+            npt.assert_equal(
+                mtrace[varname, 2:], mtrace.get_values(varname, burn=2, combine=True)
+            )
+            npt.assert_equal(
+                mtrace[varname, 2::2],
+                mtrace.get_values(varname, burn=2, thin=2, combine=True),
+            )
 
     def test_selection_method_equivalence(self):
         varname = self.mtrace.varnames[0]
         mtrace = self.mtrace
-        npt.assert_equal(mtrace.get_values(varname),
-                         mtrace[varname])
-        npt.assert_equal(mtrace[varname],
-                         mtrace.__getattr__(varname))
+        npt.assert_equal(mtrace.get_values(varname), mtrace[varname])
+        npt.assert_equal(mtrace[varname], mtrace.__getattr__(varname))
 
 
 class DumpLoadTestCase(ModelBackendSampledTestCase):
@@ -433,6 +474,7 @@ class DumpLoadTestCase(ModelBackendSampledTestCase):
     - name
     - shape
     """
+
     @classmethod
     def setup_class(cls):
         super(DumpLoadTestCase, cls).setup_class()
@@ -475,6 +517,7 @@ class BackendEqualityTestCase(ModelBackendSampledTestCase):
     - name1
     - shape
     """
+
     @classmethod
     def setup_class(cls):
         cls.backend = cls.backend0
@@ -497,18 +540,20 @@ def test_chain_length(self):
         assert self.mtrace0.nchains == self.mtrace1.nchains
         assert len(self.mtrace0) == len(self.mtrace1)
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_dtype(self):
         for varname in self.test_point.keys():
-            assert self.mtrace0.get_values(varname, chains=0).dtype == \
-                             self.mtrace1.get_values(varname, chains=0).dtype
+            assert (
+                self.mtrace0.get_values(varname, chains=0).dtype
+                == self.mtrace1.get_values(varname, chains=0).dtype
+            )
 
     def test_number_of_draws(self):
         for varname in self.test_point.keys():
-            values0 = self.mtrace0.get_values(varname, combine=False,
-                                              squeeze=False)
-            values1 = self.mtrace1.get_values(varname, combine=False,
-                                              squeeze=False)
+            values0 = self.mtrace0.get_values(varname, combine=False, squeeze=False)
+            values1 = self.mtrace1.get_values(varname, combine=False, squeeze=False)
             assert values0[0].shape[0] == self.draws
             assert values1[0].shape[0] == self.draws
 
@@ -519,59 +564,61 @@ def test_get_item(self):
     def test_get_values(self):
         for varname in self.test_point.keys():
             for cf in [False, True]:
-                npt.assert_equal(self.mtrace0.get_values(varname, combine=cf),
-                                 self.mtrace1.get_values(varname, combine=cf))
+                npt.assert_equal(
+                    self.mtrace0.get_values(varname, combine=cf),
+                    self.mtrace1.get_values(varname, combine=cf),
+                )
 
     def test_get_values_no_squeeze(self):
         for varname in self.test_point.keys():
-            npt.assert_equal(self.mtrace0.get_values(varname, combine=False,
-                                                     squeeze=False),
-                             self.mtrace1.get_values(varname, combine=False,
-                                                     squeeze=False))
+            npt.assert_equal(
+                self.mtrace0.get_values(varname, combine=False, squeeze=False),
+                self.mtrace1.get_values(varname, combine=False, squeeze=False),
+            )
 
     def test_get_values_combine_and_no_squeeze(self):
         for varname in self.test_point.keys():
-            npt.assert_equal(self.mtrace0.get_values(varname, combine=True,
-                                                     squeeze=False),
-                             self.mtrace1.get_values(varname, combine=True,
-                                                     squeeze=False))
+            npt.assert_equal(
+                self.mtrace0.get_values(varname, combine=True, squeeze=False),
+                self.mtrace1.get_values(varname, combine=True, squeeze=False),
+            )
 
     def test_get_values_with_burn(self):
         for varname in self.test_point.keys():
             for cf in [False, True]:
-                npt.assert_equal(self.mtrace0.get_values(varname, combine=cf,
-                                                         burn=3),
-                                 self.mtrace1.get_values(varname, combine=cf,
-                                                         burn=3))
+                npt.assert_equal(
+                    self.mtrace0.get_values(varname, combine=cf, burn=3),
+                    self.mtrace1.get_values(varname, combine=cf, burn=3),
+                )
                 # Burn to one value.
-                npt.assert_equal(self.mtrace0.get_values(varname, combine=cf,
-                                                         burn=self.draws - 1),
-                                 self.mtrace1.get_values(varname, combine=cf,
-                                                         burn=self.draws - 1))
+                npt.assert_equal(
+                    self.mtrace0.get_values(varname, combine=cf, burn=self.draws - 1),
+                    self.mtrace1.get_values(varname, combine=cf, burn=self.draws - 1),
+                )
 
     def test_get_values_with_thin(self):
         for varname in self.test_point.keys():
             for cf in [False, True]:
-                npt.assert_equal(self.mtrace0.get_values(varname, combine=cf,
-                                                         thin=2),
-                                 self.mtrace1.get_values(varname, combine=cf,
-                                                         thin=2))
+                npt.assert_equal(
+                    self.mtrace0.get_values(varname, combine=cf, thin=2),
+                    self.mtrace1.get_values(varname, combine=cf, thin=2),
+                )
 
     def test_get_values_with_burn_and_thin(self):
         for varname in self.test_point.keys():
             for cf in [False, True]:
-                npt.assert_equal(self.mtrace0.get_values(varname, combine=cf,
-                                                         burn=2, thin=2),
-                                 self.mtrace1.get_values(varname, combine=cf,
-                                                         burn=2, thin=2))
+                npt.assert_equal(
+                    self.mtrace0.get_values(varname, combine=cf, burn=2, thin=2),
+                    self.mtrace1.get_values(varname, combine=cf, burn=2, thin=2),
+                )
 
     def test_get_values_with_chains_arg(self):
         for varname in self.test_point.keys():
             for cf in [False, True]:
-                npt.assert_equal(self.mtrace0.get_values(varname, chains=[0],
-                                                         combine=cf),
-                                 self.mtrace1.get_values(varname, chains=[0],
-                                                         combine=cf))
+                npt.assert_equal(
+                    self.mtrace0.get_values(varname, chains=[0], combine=cf),
+                    self.mtrace1.get_values(varname, chains=[0], combine=cf),
+                )
 
     def test_get_point(self):
         npoint, spoint = self.mtrace0[4], self.mtrace1[4]
diff --git a/pymc3/tests/checks.py b/pymc3/tests/checks.py
index 292cc5fe7f..f8f2dae232 100644
--- a/pymc3/tests/checks.py
+++ b/pymc3/tests/checks.py
@@ -2,12 +2,12 @@
 
 
 def close_to(x, v, bound, name="value"):
-    assert np.all(np.logical_or(
-            np.abs(x - v) < bound,
-            x == v)), name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound)
+    assert np.all(np.logical_or(np.abs(x - v) < bound, x == v)), (
+        name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound)
+    )
 
 
-def close_to_logical(x, v, bound, name="value"):    
-    assert np.all(np.logical_or(
-            np.abs(np.bitwise_xor(x, v)) < bound,
-            x == v)), name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound)
+def close_to_logical(x, v, bound, name="value"):
+    assert np.all(np.logical_or(np.abs(np.bitwise_xor(x, v)) < bound, x == v)), (
+        name + " out of bounds : " + repr(x) + ", " + repr(v) + ", " + repr(bound)
+    )
diff --git a/pymc3/tests/conftest.py b/pymc3/tests/conftest.py
index 481fcba133..d628ce58c7 100644
--- a/pymc3/tests/conftest.py
+++ b/pymc3/tests/conftest.py
@@ -6,31 +6,29 @@
 
 @pytest.fixture(scope="function", autouse=True)
 def theano_config():
-    config = theano.configparser.change_flags(compute_test_value='raise')
+    config = theano.configparser.change_flags(compute_test_value="raise")
     with config:
         yield
 
 
-@pytest.fixture(scope='function', autouse=True)
+@pytest.fixture(scope="function", autouse=True)
 def exception_verbosity():
-    config = theano.configparser.change_flags(
-        exception_verbosity='high')
+    config = theano.configparser.change_flags(exception_verbosity="high")
     with config:
         yield
 
 
-@pytest.fixture(scope='function', autouse=False)
+@pytest.fixture(scope="function", autouse=False)
 def strict_float32():
-    if theano.config.floatX == 'float32':
-        config = theano.configparser.change_flags(
-            warn_float64='raise')
+    if theano.config.floatX == "float32":
+        config = theano.configparser.change_flags(warn_float64="raise")
         with config:
             yield
     else:
         yield
 
 
-@pytest.fixture('function', autouse=False)
+@pytest.fixture("function", autouse=False)
 def seeded_test():
     # TODO: use this instead of SeededTest
     np.random.seed(42)
diff --git a/pymc3/tests/helpers.py b/pymc3/tests/helpers.py
index 621d9844c0..0a30e8a58b 100644
--- a/pymc3/tests/helpers.py
+++ b/pymc3/tests/helpers.py
@@ -51,7 +51,7 @@ def matches(self, **kwargs):
 
 class Matcher(object):
 
-    _partial_matches = ('msg', 'message')
+    _partial_matches = ("msg", "message")
 
     def matches(self, d, **kwargs):
         """
@@ -77,7 +77,7 @@ def match_value(self, k, dv, v):
         if type(v) != type(dv):
             result = False
         elif type(dv) is not str or k not in self._partial_matches:
-            result = (v == dv)
+            result = v == dv
         else:
             result = dv.find(v) >= 0
         return result
diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py
index 117d847c3c..d30a93b182 100644
--- a/pymc3/tests/models.py
+++ b/pymc3/tests/models.py
@@ -12,16 +12,16 @@ def simple_model():
     mu = -2.1
     tau = 1.3
     with Model() as model:
-        Normal('x', mu, tau=tau, shape=2, testval=tt.ones(2) * .1)
+        Normal("x", mu, tau=tau, shape=2, testval=tt.ones(2) * 0.1)
 
-    return model.test_point, model, (mu, tau ** -.5)
+    return model.test_point, model, (mu, tau ** -0.5)
 
 
 def simple_categorical():
     p = floatX_array([0.1, 0.2, 0.3, 0.4])
     v = floatX_array([0.0, 1.0, 2.0, 3.0])
     with Model() as model:
-        Categorical('x', p, shape=3, testval=[1, 2, 3])
+        Categorical("x", p, shape=3, testval=[1, 2, 3])
 
     mu = np.dot(p, v)
     var = np.dot(p, (v - mu) ** 2)
@@ -32,9 +32,9 @@ def multidimensional_model():
     mu = -2.1
     tau = 1.3
     with Model() as model:
-        Normal('x', mu, tau=tau, shape=(3, 2), testval=.1 * tt.ones((3, 2)))
+        Normal("x", mu, tau=tau, shape=(3, 2), testval=0.1 * tt.ones((3, 2)))
 
-    return model.test_point, model, (mu, tau ** -.5)
+    return model.test_point, model, (mu, tau ** -0.5)
 
 
 def simple_arbitrary_det():
@@ -45,27 +45,27 @@ def arbitrary_det(value):
         return value
 
     with Model() as model:
-        a = Normal('a')
+        a = Normal("a")
         b = arbitrary_det(a)
-        Normal('obs', mu=b.astype('float64'), observed=floatX_array([1, 3, 5]))
+        Normal("obs", mu=b.astype("float64"), observed=floatX_array([1, 3, 5]))
 
     return model.test_point, model
 
 
 def simple_init():
     start, model, moments = simple_model()
-    step = Metropolis(model.vars, np.diag([1.]), model=model)
+    step = Metropolis(model.vars, np.diag([1.0]), model=model)
     return model, start, step, moments
 
 
 def simple_2model():
     mu = -2.1
     tau = 1.3
-    p = .4
+    p = 0.4
     with Model() as model:
-        x = pm.Normal('x', mu, tau=tau, testval=.1)
-        pm.Deterministic('logx', tt.log(x))
-        pm.Bernoulli('y', p)
+        x = pm.Normal("x", mu, tau=tau, testval=0.1)
+        pm.Deterministic("logx", tt.log(x))
+        pm.Bernoulli("y", p)
     return model.test_point, model
 
 
@@ -73,22 +73,24 @@ def simple_2model_continuous():
     mu = -2.1
     tau = 1.3
     with Model() as model:
-        x = pm.Normal('x', mu, tau=tau, testval=.1)
-        pm.Deterministic('logx', tt.log(x))
-        pm.Beta('y', alpha=1, beta=1, shape=2)
+        x = pm.Normal("x", mu, tau=tau, testval=0.1)
+        pm.Deterministic("logx", tt.log(x))
+        pm.Beta("y", alpha=1, beta=1, shape=2)
     return model.test_point, model
 
 
 def mv_simple():
-    mu = floatX_array([-.1, .5, 1.1])
-    p = floatX_array([
-        [2., 0, 0],
-        [.05, .1, 0],
-        [1., -0.05, 5.5]])
+    mu = floatX_array([-0.1, 0.5, 1.1])
+    p = floatX_array([[2.0, 0, 0], [0.05, 0.1, 0], [1.0, -0.05, 5.5]])
     tau = np.dot(p, p.T)
     with pm.Model() as model:
-        pm.MvNormal('x', tt.constant(mu), tau=tt.constant(tau),
-                    shape=3, testval=floatX_array([.1, 1., .8]))
+        pm.MvNormal(
+            "x",
+            tt.constant(mu),
+            tau=tt.constant(tau),
+            shape=3,
+            testval=floatX_array([0.1, 1.0, 0.8]),
+        )
     H = tau
     C = np.linalg.inv(H)
     return model.test_point, model, (mu, C)
@@ -97,9 +99,9 @@ def mv_simple():
 def mv_simple_discrete():
     d = 2
     n = 5
-    p = floatX_array([.15, .85])
+    p = floatX_array([0.15, 0.85])
     with pm.Model() as model:
-        pm.Multinomial('x', n, tt.constant(p), shape=d, testval=np.array([1, 4]))
+        pm.Multinomial("x", n, tt.constant(p), shape=d, testval=np.array([1, 4]))
         mu = n * p
         # covariance matrix
         C = np.zeros((d, d))
@@ -132,30 +134,29 @@ def mv_prior_simple():
     std_post = (K - np.dot(v.T, v)).diagonal() ** 0.5
 
     with pm.Model() as model:
-        x = pm.Flat('x', shape=n)
-        x_obs = pm.MvNormal('x_obs', observed=obs, mu=x,
-                            cov=noise * np.eye(n), shape=n)
+        x = pm.Flat("x", shape=n)
+        x_obs = pm.MvNormal("x_obs", observed=obs, mu=x, cov=noise * np.eye(n), shape=n)
 
     return model.test_point, model, (K, L, mu_post, std_post, noise)
 
 
 def non_normal(n=2):
     with pm.Model() as model:
-        pm.Beta('x', 3, 3, shape=n, transform=None)
-    return model.test_point, model, (np.tile([.5], n), None)
+        pm.Beta("x", 3, 3, shape=n, transform=None)
+    return model.test_point, model, (np.tile([0.5], n), None)
 
 
 def exponential_beta(n=2):
     with pm.Model() as model:
-        pm.Beta('x', 3, 1, shape=n, transform=None)
-        pm.Exponential('y', 1, shape=n, transform=None)
+        pm.Beta("x", 3, 1, shape=n, transform=None)
+        pm.Exponential("y", 1, shape=n, transform=None)
     return model.test_point, model, None
 
 
 def beta_bernoulli(n=2):
     with pm.Model() as model:
-        pm.Beta('x', 3, 1, shape=n, transform=None)
-        pm.Bernoulli('y', 0.5)
+        pm.Beta("x", 3, 1, shape=n, transform=None)
+        pm.Bernoulli("y", 0.5)
     return model.test_point, model, None
 
 
diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py
index 78f7ee8c52..484de588a5 100644
--- a/pymc3/tests/sampler_fixtures.py
+++ b/pymc3/tests/sampler_fixtures.py
@@ -29,12 +29,12 @@ def test_kstest(self):
         for varname, cdf in self.cdfs.items():
             samples = self.samples[varname]
             if samples.ndim == 1:
-                t, p = stats.kstest(samples[::self.ks_thin], cdf=cdf)
+                t, p = stats.kstest(samples[:: self.ks_thin], cdf=cdf)
                 assert self.alpha < p
             elif samples.ndim == 2:
                 pvals = []
                 for samples_, cdf_ in zip(samples.T, cdf):
-                    t, p = stats.kstest(samples_[::self.ks_thin], cdf=cdf_)
+                    t, p = stats.kstest(samples_[:: self.ks_thin], cdf=cdf_)
                     pvals.append(p)
                 t, p = stats.combine_pvalues(pvals)
                 assert self.alpha < p
@@ -42,11 +42,10 @@ def test_kstest(self):
                 raise NotImplementedError()
 
 
-
 class UniformFixture(KnownMean, KnownVariance, KnownCDF):
-    means = {'a': 0}
-    variances = {'a': 1.0 / 3}
-    cdfs = {'a': stats.uniform(-1, 2).cdf}
+    means = {"a": 0}
+    variances = {"a": 1.0 / 3}
+    cdfs = {"a": stats.uniform(-1, 2).cdf}
 
     @classmethod
     def make_model(cls):
@@ -57,9 +56,9 @@ def make_model(cls):
 
 
 class NormalFixture(KnownMean, KnownVariance, KnownCDF):
-    means = {'a': 2 * np.ones(10)}
-    variances = {'a': 3 * np.ones(10)}
-    cdfs = {'a': [stats.norm(2, np.sqrt(3)).cdf for _ in range(10)]}
+    means = {"a": 2 * np.ones(10)}
+    variances = {"a": 3 * np.ones(10)}
+    cdfs = {"a": [stats.norm(2, np.sqrt(3)).cdf for _ in range(10)]}
 
     @classmethod
     def make_model(cls):
@@ -69,20 +68,19 @@ def make_model(cls):
 
 
 class BetaBinomialFixture(KnownCDF):
-    cdfs = {'p': [stats.beta(a, b).cdf
-                  for a, b in zip([1.5, 2.5, 10], [3.5, 10.5, 1])]}
+    cdfs = {"p": [stats.beta(a, b).cdf for a, b in zip([1.5, 2.5, 10], [3.5, 10.5, 1])]}
 
     @classmethod
     def make_model(cls):
         with pm.Model() as model:
-            p = pm.Beta("p", [0.5, 0.5, 1.], [0.5, 0.5, 1.], shape=3)
+            p = pm.Beta("p", [0.5, 0.5, 1.0], [0.5, 0.5, 1.0], shape=3)
             pm.Binomial("y", p=p, n=[4, 12, 9], observed=[1, 2, 9])
         return model
 
 
 class StudentTFixture(KnownMean, KnownCDF):
-    means = {'a': 0}
-    cdfs = {'a': stats.t(df=4).cdf}
+    means = {"a": 0}
+    cdfs = {"a": stats.t(df=4).cdf}
     ks_thin = 10
 
     @classmethod
@@ -94,14 +92,12 @@ def make_model(cls):
 
 class LKJCholeskyCovFixture(KnownCDF):
     cdfs = {
-        'log_stds': [stats.norm(loc=x, scale=x / 10.).cdf
-                     for x in [1, 2, 3, 4, 5]],
+        "log_stds": [stats.norm(loc=x, scale=x / 10.0).cdf for x in [1, 2, 3, 4, 5]],
         # The entries of the correlation matrix should follow
         # beta(eta - 1 + d/2, eta - 1 + d/2) on (-1, 1).
         # See https://arxiv.org/abs/1309.7268
-        'corr_entries_unit': [
-            stats.beta(3 - 1 + 2.5, 3 - 1 + 2.5).cdf
-            for _ in range(10)
+        "corr_entries_unit": [
+            stats.beta(3 - 1 + 2.5, 3 - 1 + 2.5).cdf for _ in range(10)
         ],
     }
 
@@ -109,15 +105,15 @@ class LKJCholeskyCovFixture(KnownCDF):
     def make_model(cls):
         with pm.Model() as model:
             sd_mu = np.array([1, 2, 3, 4, 5])
-            sd_dist = pm.Lognormal.dist(mu=sd_mu, sd=sd_mu / 10., shape=5)
-            chol_packed = pm.LKJCholeskyCov('chol_packed', eta=3, n=5, sd_dist=sd_dist)
+            sd_dist = pm.Lognormal.dist(mu=sd_mu, sd=sd_mu / 10.0, shape=5)
+            chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist)
             chol = pm.expand_packed_triangular(5, chol_packed, lower=True)
             cov = tt.dot(chol, chol.T)
             stds = tt.sqrt(tt.diag(cov))
-            pm.Deterministic('log_stds', tt.log(stds))
+            pm.Deterministic("log_stds", tt.log(stds))
             corr = cov / stds[None, :] / stds[:, None]
             corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2
-            pm.Deterministic('corr_entries_unit', corr_entries_unit)
+            pm.Deterministic("corr_entries_unit", corr_entries_unit)
         return model
 
 
@@ -128,19 +124,21 @@ def setup_class(cls):
         cls.model = cls.make_model()
         with cls.model:
             cls.step = cls.make_step()
-            cls.trace = pm.sample(cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains)
+            cls.trace = pm.sample(
+                cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains
+            )
         cls.samples = {}
         for var in cls.model.unobserved_RVs:
             cls.samples[str(var)] = cls.trace.get_values(var, burn=cls.burn)
 
     def test_neff(self):
-        if hasattr(self, 'min_n_eff'):
-            n_eff = pm.effective_n(self.trace[self.burn:])
+        if hasattr(self, "min_n_eff"):
+            n_eff = pm.effective_n(self.trace[self.burn :])
             for var in n_eff:
                 npt.assert_array_less(self.min_n_eff, n_eff[var])
 
     def test_Rhat(self):
-        rhat = pm.gelman_rubin(self.trace[self.burn:])
+        rhat = pm.gelman_rubin(self.trace[self.burn :])
         for var in rhat:
             npt.assert_allclose(rhat[var], 1, rtol=0.01)
 
@@ -149,16 +147,16 @@ class NutsFixture(BaseSampler):
     @classmethod
     def make_step(cls):
         args = {}
-        if hasattr(cls, 'step_args'):
+        if hasattr(cls, "step_args"):
             args.update(cls.step_args)
-        if 'scaling' not in args:
+        if "scaling" not in args:
             _, step = pm.sampling.init_nuts(n_init=10000, **args)
         else:
             step = pm.NUTS(**args)
         return step
 
     def test_target_accept(self):
-        accept = self.trace[self.burn:]['mean_tree_accept']
+        accept = self.trace[self.burn :]["mean_tree_accept"]
         npt.assert_allclose(accept.mean(), self.step.target_accept, 1)
 
 
@@ -166,7 +164,7 @@ class MetropolisFixture(BaseSampler):
     @classmethod
     def make_step(cls):
         args = {}
-        if hasattr(cls, 'step_args'):
+        if hasattr(cls, "step_args"):
             args.update(cls.step_args)
         return pm.Metropolis(**args)
 
@@ -175,6 +173,6 @@ class SliceFixture(BaseSampler):
     @classmethod
     def make_step(cls):
         args = {}
-        if hasattr(cls, 'step_args'):
+        if hasattr(cls, "step_args"):
             args.update(cls.step_args)
         return pm.Slice(**args)
diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py
index 35c8854adc..6caf258734 100644
--- a/pymc3/tests/test_diagnostics.py
+++ b/pymc3/tests/test_diagnostics.py
@@ -13,7 +13,9 @@
 import theano
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestGelmanRubin(SeededTest):
     good_ratio = 1.1
 
@@ -23,24 +25,29 @@ def get_ptrace(self, n_samples):
             # Run sampler
             step1 = Slice([model.early_mean_log__, model.late_mean_log__])
             step2 = Metropolis([model.switchpoint])
-            start = {'early_mean': 7., 'late_mean': 5., 'switchpoint': 10}
-            ptrace = sample(n_samples, tune=0, step=[step1, step2], start=start, cores=2,
-                            progressbar=False, random_seed=[20090425, 19700903])
+            start = {"early_mean": 7.0, "late_mean": 5.0, "switchpoint": 10}
+            ptrace = sample(
+                n_samples,
+                tune=0,
+                step=[step1, step2],
+                start=start,
+                cores=2,
+                progressbar=False,
+                random_seed=[20090425, 19700903],
+            )
         return ptrace
 
     def test_good(self):
         """Confirm Gelman-Rubin statistic is close to 1 for a reasonable number of samples."""
         n_samples = 1000
         rhat = gelman_rubin(self.get_ptrace(n_samples))
-        assert all(1 / self.good_ratio < r <
-                            self.good_ratio for r in rhat.values())
+        assert all(1 / self.good_ratio < r < self.good_ratio for r in rhat.values())
 
     def test_bad(self):
         """Confirm Gelman-Rubin statistic is far from 1 for a small number of samples."""
         n_samples = 5
         rhat = gelman_rubin(self.get_ptrace(n_samples))
-        assert not all(1 / self.good_ratio < r <
-                             self.good_ratio for r in rhat.values())
+        assert not all(1 / self.good_ratio < r < self.good_ratio for r in rhat.values())
 
     def test_right_shape_python_float(self, shape=None, test_shape=None):
         """Check Gelman-Rubin statistic shape is correct w/ python float"""
@@ -49,17 +56,18 @@ def test_right_shape_python_float(self, shape=None, test_shape=None):
 
         with Model():
             if shape is not None:
-                Normal('x', 0, 1., shape=shape)
+                Normal("x", 0, 1.0, shape=shape)
             else:
-                Normal('x', 0, 1.)
+                Normal("x", 0, 1.0)
 
             # start sampling at the MAP
             start = find_MAP()
             step = NUTS(scaling=start, step_scale=0.1)
-            ptrace = sample(n_samples, tune=0, step=step, start=start,
-                            chains=chains, random_seed=42)
+            ptrace = sample(
+                n_samples, tune=0, step=step, start=start, chains=chains, random_seed=42
+            )
 
-        rhat = gelman_rubin(ptrace)['x']
+        rhat = gelman_rubin(ptrace)["x"]
 
         if test_shape is None:
             test_shape = shape
@@ -87,19 +95,26 @@ def test_right_shape_scalar_one(self):
         self.test_right_shape_python_float(shape=1, test_shape=(1,))
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestDiagnostics(SeededTest):
-
     def get_switchpoint(self, n_samples, chains=1):
         model = build_disaster_model()
         with model:
             # Run sampler
             step1 = Slice([model.early_mean_log__, model.late_mean_log__])
             step2 = Metropolis([model.switchpoint])
-            trace = sample(0, tune=n_samples, step=[step1, step2],
-                           progressbar=False, random_seed=1,
-                           discard_tuned_samples=False, chains=chains)
-        return trace['switchpoint']
+            trace = sample(
+                0,
+                tune=n_samples,
+                step=[step1, step2],
+                progressbar=False,
+                random_seed=1,
+                discard_tuned_samples=False,
+                chains=chains,
+            )
+        return trace["switchpoint"]
 
     def test_geweke_negative(self):
         """Confirm Geweke diagnostic is larger than 1 for a small number of samples."""
@@ -110,8 +125,7 @@ def test_geweke_negative(self):
         last = 0.7
         # returns (intervalsx2) matrix, with first row start indexes, second
         # z-scores
-        z_switch = geweke(switchpoint, first=first,
-                          last=last, intervals=n_intervals)
+        z_switch = geweke(switchpoint, first=first, last=last, intervals=n_intervals)
 
         # These z-scores should be larger, since there are not many samples.
         assert max(abs(z_switch[:, 1])) > 1
@@ -135,8 +149,7 @@ def test_geweke_positive(self):
         last = 0.7
         # returns (intervalsx2) matrix, with first row start indexes, second
         # z-scores
-        z_switch = geweke(switchpoint, first=first,
-                          last=last, intervals=n_intervals)
+        z_switch = geweke(switchpoint, first=first, last=last, intervals=n_intervals)
         start = z_switch[:, 0]
         z_scores = z_switch[:, 1]
 
@@ -155,38 +168,49 @@ def test_effective_n(self):
         n_samples = 100
 
         with Model():
-            Normal('x', 0, 1., shape=5)
+            Normal("x", 0, 1.0, shape=5)
 
             # start sampling at the MAP
             start = find_MAP()
             step = NUTS(scaling=start)
-            ptrace = sample(0, tune=n_samples, step=step, start=start,
-                            cores=n_jobs, discard_tuned_samples=False,
-                            random_seed=42)
-
-        n_effective = effective_n(ptrace)['x']
+            ptrace = sample(
+                0,
+                tune=n_samples,
+                step=step,
+                start=start,
+                cores=n_jobs,
+                discard_tuned_samples=False,
+                random_seed=42,
+            )
+
+        n_effective = effective_n(ptrace)["x"]
         assert_allclose(n_effective, n_jobs * n_samples, 2)
 
-    def test_effective_n_right_shape_python_float(self,
-                                                  shape=None, test_shape=None):
+    def test_effective_n_right_shape_python_float(self, shape=None, test_shape=None):
         """Check effective sample size shape is correct w/ python float"""
         n_jobs = 3
         n_samples = 10
 
         with Model():
             if shape is not None:
-                Normal('x', 0, 1., shape=shape)
+                Normal("x", 0, 1.0, shape=shape)
             else:
-                Normal('x', 0, 1.)
+                Normal("x", 0, 1.0)
 
             # start sampling at the MAP
             start = find_MAP()
             step = NUTS(scaling=start)
-            ptrace = sample(0, tune=n_samples, step=step, start=start,
-                            cores=n_jobs, discard_tuned_samples=False,
-                            random_seed=42)
-
-        n_effective = effective_n(ptrace)['x']
+            ptrace = sample(
+                0,
+                tune=n_samples,
+                step=step,
+                start=start,
+                cores=n_jobs,
+                discard_tuned_samples=False,
+                random_seed=42,
+            )
+
+        n_effective = effective_n(ptrace)["x"]
 
         if test_shape is None:
             test_shape = shape
@@ -211,5 +235,4 @@ def test_effective_n_right_shape_scalar_array(self):
 
     def test_effective_n_right_shape_scalar_one(self):
         """Check effective sample size shape is correct w/ scalar as shape=1"""
-        self.test_effective_n_right_shape_python_float(shape=1,
-                                                       test_shape=(1,))
+        self.test_effective_n_right_shape_python_float(shape=1, test_shape=(1,))
diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py
index 57e6fdd19a..df07e69e74 100644
--- a/pymc3/tests/test_dist_math.py
+++ b/pymc3/tests/test_dist_math.py
@@ -10,7 +10,12 @@
 from ..theanof import floatX
 from ..distributions import Discrete
 from ..distributions.dist_math import (
-    bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper)
+    bound,
+    factln,
+    alltrue_scalar,
+    MvNormalLogp,
+    SplineWrapper,
+)
 
 
 def test_bound():
@@ -36,36 +41,29 @@ def test_bound():
     assert not np.all(bound(logp, cond).eval() == 1)
     assert np.prod(bound(logp, cond).eval()) == -np.inf
 
+
 def test_alltrue_scalar():
     assert alltrue_scalar([]).eval()
     assert alltrue_scalar([True]).eval()
     assert alltrue_scalar([tt.ones(10)]).eval()
-    assert alltrue_scalar([tt.ones(10),
-                    5 * tt.ones(101)]).eval()
-    assert alltrue_scalar([np.ones(10),
-                    5 * tt.ones(101)]).eval()
-    assert alltrue_scalar([np.ones(10),
-                    True,
-                    5 * tt.ones(101)]).eval()
-    assert alltrue_scalar([np.array([1, 2, 3]),
-                    True,
-                    5 * tt.ones(101)]).eval()
+    assert alltrue_scalar([tt.ones(10), 5 * tt.ones(101)]).eval()
+    assert alltrue_scalar([np.ones(10), 5 * tt.ones(101)]).eval()
+    assert alltrue_scalar([np.ones(10), True, 5 * tt.ones(101)]).eval()
+    assert alltrue_scalar([np.array([1, 2, 3]), True, 5 * tt.ones(101)]).eval()
 
     assert not alltrue_scalar([False]).eval()
     assert not alltrue_scalar([tt.zeros(10)]).eval()
-    assert not alltrue_scalar([True,
-                        False]).eval()
-    assert not alltrue_scalar([np.array([0, -1]),
-                        tt.ones(60)]).eval()
-    assert not alltrue_scalar([np.ones(10),
-                        False,
-                        5 * tt.ones(101)]).eval()
+    assert not alltrue_scalar([True, False]).eval()
+    assert not alltrue_scalar([np.array([0, -1]), tt.ones(60)]).eval()
+    assert not alltrue_scalar([np.ones(10), False, 5 * tt.ones(101)]).eval()
+
 
 def test_alltrue_shape():
     vals = [True, tt.ones(10), tt.zeros(5)]
 
     assert alltrue_scalar(vals).eval().shape == ()
 
+
 class MultinomialA(Discrete):
     def __init__(self, n, p, *args, **kwargs):
         super(MultinomialA, self).__init__(*args, **kwargs)
@@ -77,11 +75,13 @@ def logp(self, value):
         n = self.n
         p = self.p
 
-        return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
-                     value >= 0,
-                     0 <= p, p <= 1,
-                     tt.isclose(p.sum(), 1),
-                     broadcast_conditions=False
+        return bound(
+            factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
+            value >= 0,
+            0 <= p,
+            p <= 1,
+            tt.isclose(p.sum(), 1),
+            broadcast_conditions=False,
         )
 
 
@@ -96,11 +96,13 @@ def logp(self, value):
         n = self.n
         p = self.p
 
-        return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
-                     tt.all(value >= 0),
-                     tt.all(0 <= p), tt.all(p <= 1),
-                     tt.isclose(p.sum(), 1),
-                     broadcast_conditions=False
+        return bound(
+            factln(n) - factln(value).sum() + (value * tt.log(p)).sum(),
+            tt.all(value >= 0),
+            tt.all(0 <= p),
+            tt.all(p <= 1),
+            tt.isclose(p.sum(), 1),
+            broadcast_conditions=False,
         )
 
 
@@ -110,27 +112,28 @@ def test_multinomial_bound():
     n = x.sum()
 
     with pm.Model() as modelA:
-        p_a = pm.Dirichlet('p', floatX(np.ones(2)))
-        MultinomialA('x', n, p_a, observed=x)
+        p_a = pm.Dirichlet("p", floatX(np.ones(2)))
+        MultinomialA("x", n, p_a, observed=x)
 
     with pm.Model() as modelB:
-        p_b = pm.Dirichlet('p', floatX(np.ones(2)))
-        MultinomialB('x', n, p_b, observed=x)
+        p_b = pm.Dirichlet("p", floatX(np.ones(2)))
+        MultinomialB("x", n, p_b, observed=x)
 
-    assert np.isclose(modelA.logp({'p_stickbreaking__': [0]}),
-                      modelB.logp({'p_stickbreaking__': [0]}))
+    assert np.isclose(
+        modelA.logp({"p_stickbreaking__": [0]}), modelB.logp({"p_stickbreaking__": [0]})
+    )
 
 
-class TestMvNormalLogp():
+class TestMvNormalLogp:
     def test_logp(self):
         np.random.seed(42)
 
         chol_val = floatX(np.array([[1, 0.9], [0, 2]]))
         cov_val = floatX(np.dot(chol_val, chol_val.T))
-        cov = tt.matrix('cov')
+        cov = tt.matrix("cov")
         cov.tag.test_value = cov_val
         delta_val = floatX(np.random.randn(5, 2))
-        delta = tt.matrix('delta')
+        delta = tt.matrix("delta")
         delta.tag.test_value = delta_val
         expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val)
         expect = expect.logpdf(delta_val).sum()
@@ -144,14 +147,16 @@ def test_grad(self):
         np.random.seed(42)
 
         def func(chol_vec, delta):
-            chol = tt.stack([
-                tt.stack([tt.exp(0.1 * chol_vec[0]), 0]),
-                tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]),
-            ])
+            chol = tt.stack(
+                [
+                    tt.stack([tt.exp(0.1 * chol_vec[0]), 0]),
+                    tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]),
+                ]
+            )
             cov = tt.dot(chol, chol.T)
             return MvNormalLogp()(cov, delta)
 
-        chol_vec_val = floatX(np.array([0.5, 1., -0.1]))
+        chol_vec_val = floatX(np.array([0.5, 1.0, -0.1]))
 
         delta_val = floatX(np.random.randn(1, 2))
         utt.verify_grad(func, [chol_vec_val, delta_val])
@@ -162,14 +167,16 @@ def func(chol_vec, delta):
     @pytest.mark.skip(reason="Fix in theano not released yet: Theano#5908")
     @theano.configparser.change_flags(compute_test_value="ignore")
     def test_hessian(self):
-        chol_vec = tt.vector('chol_vec')
+        chol_vec = tt.vector("chol_vec")
         chol_vec.tag.test_value = np.array([0.1, 2, 3])
-        chol = tt.stack([
-            tt.stack([tt.exp(0.1 * chol_vec[0]), 0]),
-            tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]),
-        ])
+        chol = tt.stack(
+            [
+                tt.stack([tt.exp(0.1 * chol_vec[0]), 0]),
+                tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]),
+            ]
+        )
         cov = tt.dot(chol, chol.T)
-        delta = tt.matrix('delta')
+        delta = tt.matrix("delta")
         delta.tag.test_value = np.ones((5, 2))
         logp = MvNormalLogp()(cov, delta)
         g_cov, g_delta = tt.grad(logp, [cov, delta])
@@ -189,7 +196,7 @@ def test_hessian(self):
         x = np.linspace(0, 1, 100)
         y = x * x
         spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1))
-        x_var = tt.dscalar('x')
+        x_var = tt.dscalar("x")
         g_x, = tt.grad(spline(x_var), [x_var])
         with pytest.raises(NotImplementedError):
             tt.grad(g_x, [x_var])
diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py
index 331872e04c..7b536d66e1 100644
--- a/pymc3/tests/test_distribution_defaults.py
+++ b/pymc3/tests/test_distribution_defaults.py
@@ -8,7 +8,6 @@
 
 
 class DistTest(Continuous):
-
     def __init__(self, a, b, *args, **kwargs):
         super(DistTest, self).__init__(*args, **kwargs)
         self.a = a
@@ -20,52 +19,53 @@ def logp(self, v):
 
 def test_default_nan_fail():
     with Model(), pytest.raises(AttributeError):
-        DistTest('x', np.nan, 2, defaults=['a'])
+        DistTest("x", np.nan, 2, defaults=["a"])
 
 
 def test_default_empty_fail():
     with Model(), pytest.raises(AttributeError):
-        DistTest('x', 1, 2, defaults=[])
+        DistTest("x", 1, 2, defaults=[])
 
 
 def test_default_testval():
     with Model():
-        x = DistTest('x', 1, 2, testval=5, defaults=[])
+        x = DistTest("x", 1, 2, testval=5, defaults=[])
         assert x.tag.test_value == 5
 
 
 def test_default_testval_nan():
     with Model():
-        x = DistTest('x', 1, 2, testval=np.nan, defaults=['a'])
+        x = DistTest("x", 1, 2, testval=np.nan, defaults=["a"])
         np.testing.assert_almost_equal(x.tag.test_value, np.nan)
 
 
 def test_default_a():
     with Model():
-        x = DistTest('x', 1, 2, defaults=['a'])
+        x = DistTest("x", 1, 2, defaults=["a"])
         assert x.tag.test_value == 1
 
 
 def test_default_b():
     with Model():
-        x = DistTest('x', np.nan, 2, defaults=['a', 'b'])
+        x = DistTest("x", np.nan, 2, defaults=["a", "b"])
         assert x.tag.test_value == 2
 
 
 def test_default_c():
     with Model():
-        y = DistTest('y', 7, 8, testval=94)
-        x = DistTest('x', y, 2, defaults=['a', 'b'])
+        y = DistTest("y", 7, 8, testval=94)
+        x = DistTest("x", y, 2, defaults=["a", "b"])
         assert x.tag.test_value == 94
 
 
 def test_default_discrete_uniform():
     with Model():
-        x = DiscreteUniform('x', lower=1, upper=2)
+        x = DiscreteUniform("x", lower=1, upper=2)
         assert x.init_value == 1
 
+
 def test_discrete_uniform_negative():
     model = Model()
     with model:
-        x = DiscreteUniform('x', lower=-10, upper=0)
-    assert model.test_point['x'] == -5
+        x = DiscreteUniform("x", lower=-10, upper=0)
+    assert model.test_point["x"] == -5
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 783cd8e8e0..5891b34be5 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -8,17 +8,60 @@
 from ..model import Model, Point, Potential, Deterministic
 from ..blocking import DictToVarBijection, DictToArrayBijection, ArrayOrdering
 from ..distributions import (
-    DensityDist, Categorical, Multinomial, VonMises, Dirichlet,
-    MvStudentT, MvNormal, MatrixNormal, ZeroInflatedPoisson,
-    ZeroInflatedNegativeBinomial, Constant, Poisson, Bernoulli, Beta,
-    BetaBinomial, HalfStudentT, StudentT, Weibull, Pareto,
-    InverseGamma, Gamma, Cauchy, HalfCauchy, Lognormal, Laplace,
-    NegativeBinomial, Geometric, Exponential, ExGaussian, Normal, TruncatedNormal,
-    Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
-    Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull,
-    Gumbel, Logistic, OrderedLogistic, LogitNormal, Interpolated,
-    ZeroInflatedBinomial, HalfFlat, AR1, KroneckerNormal, Rice,
-    Kumaraswamy
+    DensityDist,
+    Categorical,
+    Multinomial,
+    VonMises,
+    Dirichlet,
+    MvStudentT,
+    MvNormal,
+    MatrixNormal,
+    ZeroInflatedPoisson,
+    ZeroInflatedNegativeBinomial,
+    Constant,
+    Poisson,
+    Bernoulli,
+    Beta,
+    BetaBinomial,
+    HalfStudentT,
+    StudentT,
+    Weibull,
+    Pareto,
+    InverseGamma,
+    Gamma,
+    Cauchy,
+    HalfCauchy,
+    Lognormal,
+    Laplace,
+    NegativeBinomial,
+    Geometric,
+    Exponential,
+    ExGaussian,
+    Normal,
+    TruncatedNormal,
+    Flat,
+    LKJCorr,
+    Wald,
+    ChiSquared,
+    HalfNormal,
+    DiscreteUniform,
+    Bound,
+    Uniform,
+    Triangular,
+    Binomial,
+    SkewNormal,
+    DiscreteWeibull,
+    Gumbel,
+    Logistic,
+    OrderedLogistic,
+    LogitNormal,
+    Interpolated,
+    ZeroInflatedBinomial,
+    HalfFlat,
+    AR1,
+    KroneckerNormal,
+    Rice,
+    Kumaraswamy,
 )
 
 from ..distributions import continuous
@@ -37,6 +80,7 @@
 import theano.tensor as tt
 from ..math import kronecker
 
+
 def get_lkj_cases():
     """
     Log probabilities calculated using the formulas in:
@@ -48,7 +92,7 @@ def get_lkj_cases():
         (tri, 3, 3, -7.7963493376312742),
         (tri, 0, 3, -np.inf),
         (np.array([1.1, 0.0, -0.7]), 1, 3, -np.inf),
-        (np.array([0.7, 0.0, -1.1]), 1, 3, -np.inf)
+        (np.array([0.7, 0.0, -1.1]), 1, 3, -np.inf),
     ]
 
 
@@ -58,7 +102,7 @@ def get_lkj_cases():
 class Domain(object):
     def __init__(self, vals, dtype=None, edges=None, shape=None):
         avals = array(vals, dtype=dtype)
-        if dtype is None and not str(avals.dtype).startswith('int'):
+        if dtype is None and not str(avals.dtype).startswith("int"):
             avals = avals.astype(theano.config.floatX)
         vals = [array(v, dtype=avals.dtype) for v in vals]
 
@@ -79,7 +123,8 @@ def __add__(self, other):
             [v + other for v in self.vals],
             self.dtype,
             (self.lower + other, self.upper + other),
-            self.shape)
+            self.shape,
+        )
 
     def __mul__(self, other):
         try:
@@ -87,20 +132,20 @@ def __mul__(self, other):
                 [v * other for v in self.vals],
                 self.dtype,
                 (self.lower * other, self.upper * other),
-                self.shape)
+                self.shape,
+            )
         except TypeError:
             return Domain(
                 [v * other for v in self.vals],
                 self.dtype,
                 (self.lower, self.upper),
-                self.shape)
+                self.shape,
+            )
 
     def __neg__(self):
         return Domain(
-            [-v for v in self.vals],
-            self.dtype,
-            (-self.lower, -self.upper),
-            self.shape)
+            [-v for v in self.vals], self.dtype, (-self.lower, -self.upper), self.shape
+        )
 
 
 def product(domains, n_samples=-1):
@@ -118,33 +163,35 @@ def product(domains, n_samples=-1):
         names, domains = zip(*domains.items())
     except ValueError:  # domains.items() is empty
         return []
-    all_vals = [zip(names, val) for val in itertools.product(*[d.vals for d in domains])]
+    all_vals = [
+        zip(names, val) for val in itertools.product(*[d.vals for d in domains])
+    ]
     if n_samples > 0 and len(all_vals) > n_samples:
-            return (all_vals[j] for j in nr.choice(len(all_vals), n_samples, replace=False))
+        return (all_vals[j] for j in nr.choice(len(all_vals), n_samples, replace=False))
     return all_vals
 
 
-R = Domain([-inf, -2.1, -1, -.01, .0, .01, 1, 2.1, inf])
-Rplus = Domain([0, .01, .1, .9, .99, 1, 1.5, 2, 100, inf])
-Rplusbig = Domain([0, .5, .9, .99, 1, 1.5, 2, 20, inf])
-Rminusbig = Domain([-inf, -2, -1.5, -1, -.99, -.9, -.5, -0.01, 0])
-Unit = Domain([0, .001, .1, .5, .75, .99, 1])
+R = Domain([-inf, -2.1, -1, -0.01, 0.0, 0.01, 1, 2.1, inf])
+Rplus = Domain([0, 0.01, 0.1, 0.9, 0.99, 1, 1.5, 2, 100, inf])
+Rplusbig = Domain([0, 0.5, 0.9, 0.99, 1, 1.5, 2, 20, inf])
+Rminusbig = Domain([-inf, -2, -1.5, -1, -0.99, -0.9, -0.5, -0.01, 0])
+Unit = Domain([0, 0.001, 0.1, 0.5, 0.75, 0.99, 1])
 
-Circ = Domain([-np.pi, -2.1, -1, -.01, .0, .01, 1, 2.1, np.pi])
+Circ = Domain([-np.pi, -2.1, -1, -0.01, 0.0, 0.01, 1, 2.1, np.pi])
 
-Runif = Domain([-1, -.4, 0, .4, 1])
-Rdunif = Domain([-10, 0, 10.])
-Rplusunif = Domain([0, .5, inf])
-Rplusdunif = Domain([2, 10, 100], 'int64')
+Runif = Domain([-1, -0.4, 0, 0.4, 1])
+Rdunif = Domain([-10, 0, 10.0])
+Rplusunif = Domain([0, 0.5, inf])
+Rplusdunif = Domain([2, 10, 100], "int64")
 
-I = Domain([-1000, -3, -2, -1, 0, 1, 2, 3, 1000], 'int64')
+I = Domain([-1000, -3, -2, -1, 0, 1, 2, 3, 1000], "int64")
 
-NatSmall = Domain([0, 3, 4, 5, 1000], 'int64')
-Nat = Domain([0, 1, 2, 3, 2000], 'int64')
-NatBig = Domain([0, 1, 2, 3, 5000, 50000], 'int64')
-PosNat = Domain([1, 2, 3, 2000], 'int64')
+NatSmall = Domain([0, 3, 4, 5, 1000], "int64")
+Nat = Domain([0, 1, 2, 3, 2000], "int64")
+NatBig = Domain([0, 1, 2, 3, 5000, 50000], "int64")
+PosNat = Domain([1, 2, 3, 2000], "int64")
 
-Bool = Domain([0, 0, 1, 1], 'int64')
+Bool = Domain([0, 0, 1, 1], "int64")
 
 
 def build_model(distfam, valuedomain, vardomains, extra_args=None):
@@ -153,10 +200,9 @@ def build_model(distfam, valuedomain, vardomains, extra_args=None):
     with Model() as m:
         vals = {}
         for v, dom in vardomains.items():
-            vals[v] = Flat(v, dtype=dom.dtype, shape=dom.shape,
-                           testval=dom.vals[0])
+            vals[v] = Flat(v, dtype=dom.dtype, shape=dom.shape, testval=dom.vals[0])
         vals.update(extra_args)
-        distfam('value', shape=valuedomain.shape, transform=None, **vals)
+        distfam("value", shape=valuedomain.shape, transform=None, **vals)
     return m
 
 
@@ -167,21 +213,31 @@ def integrate_nd(f, domain, shape, dtype):
         else:
             return sum(f(j) for j in range(domain.lower, domain.upper + 1))
     elif shape == (2,):
+
         def f2(a, b):
             return f([a, b])
 
-        return integrate.dblquad(f2, domain.lower[0], domain.upper[0],
-                                 lambda _: domain.lower[1],
-                                 lambda _: domain.upper[1])[0]
+        return integrate.dblquad(
+            f2,
+            domain.lower[0],
+            domain.upper[0],
+            lambda _: domain.lower[1],
+            lambda _: domain.upper[1],
+        )[0]
     elif shape == (3,):
+
         def f3(a, b, c):
             return f([a, b, c])
 
-        return integrate.tplquad(f3, domain.lower[0], domain.upper[0],
-                                 lambda _: domain.lower[1],
-                                 lambda _: domain.upper[1],
-                                 lambda _, __: domain.lower[2],
-                                 lambda _, __: domain.upper[2])[0]
+        return integrate.tplquad(
+            f3,
+            domain.lower[0],
+            domain.upper[0],
+            lambda _: domain.lower[1],
+            lambda _: domain.upper[1],
+            lambda _, __: domain.lower[2],
+            lambda _, __: domain.upper[2],
+        )[0]
     else:
         raise ValueError("Dont know how to integrate shape: " + str(shape))
 
@@ -197,7 +253,7 @@ def multinomial_logpdf(value, n, p):
 
 
 def beta_mu_sd(value, mu, sd):
-    kappa = mu * (1 - mu) / sd**2 - 1
+    kappa = mu * (1 - mu) / sd ** 2 - 1
     if kappa > 0:
         return sp.beta.logpdf(value, mu * kappa, (1 - mu) * kappa)
     else:
@@ -271,14 +327,15 @@ def matrix_normal_logpdf_cov(value, mu, rowcov, colcov):
 
 
 def matrix_normal_logpdf_chol(value, mu, rowchol, colchol):
-    return matrix_normal_logpdf_cov(value, mu, np.dot(rowchol, rowchol.T),
-                                    np.dot(colchol, colchol.T))
+    return matrix_normal_logpdf_cov(
+        value, mu, np.dot(rowchol, rowchol.T), np.dot(colchol, colchol.T)
+    )
 
 
 def kron_normal_logpdf_cov(value, mu, covs, sigma):
     cov = kronecker(*covs).eval()
     if sigma is not None:
-        cov += sigma**2 * np.eye(*cov.shape)
+        cov += sigma ** 2 * np.eye(*cov.shape)
     return scipy.stats.multivariate_normal.logpdf(value, mu, cov).sum()
 
 
@@ -311,8 +368,11 @@ def logpow(v, p):
 
 
 def discrete_weibull_logpmf(value, q, beta):
-    return floatX(np.log(np.power(q, np.power(value, beta))
-                  - np.power(q, np.power(value + 1, beta))))
+    return floatX(
+        np.log(
+            np.power(q, np.power(value, beta)) - np.power(q, np.power(value + 1, beta))
+        )
+    )
 
 
 def dirichlet_logpdf(value, a):
@@ -325,6 +385,7 @@ def categorical_logpdf(value, p):
     else:
         return -inf
 
+
 def mvt_logpdf(value, nu, Sigma, mu=0):
     d = len(Sigma)
     dist = np.atleast_2d(value) - mu
@@ -333,22 +394,28 @@ def mvt_logpdf(value, nu, Sigma, mu=0):
     logdet = np.log(np.diag(chol)).sum()
 
     lgamma = scipy.special.gammaln
-    norm = lgamma((nu + d) / 2.)  - 0.5 * d * np.log(nu * np.pi) - lgamma(nu / 2.)
-    logp = norm - logdet - (nu + d) / 2. * np.log1p((trafo * trafo).sum(-1) / nu)
+    norm = lgamma((nu + d) / 2.0) - 0.5 * d * np.log(nu * np.pi) - lgamma(nu / 2.0)
+    logp = norm - logdet - (nu + d) / 2.0 * np.log1p((trafo * trafo).sum(-1) / nu)
     return logp.sum()
 
+
 def AR1_logpdf(value, k, tau_e):
-    return (sp.norm(loc=0,scale=1/np.sqrt(tau_e)).logpdf(value[0]) +
-            sp.norm(loc=k*value[:-1],scale=1/np.sqrt(tau_e)).logpdf(value[1:]).sum())
+    return (
+        sp.norm(loc=0, scale=1 / np.sqrt(tau_e)).logpdf(value[0])
+        + sp.norm(loc=k * value[:-1], scale=1 / np.sqrt(tau_e)).logpdf(value[1:]).sum()
+    )
+
 
 def invlogit(x, eps=sys.float_info.epsilon):
-    return (1. - 2. * eps) / (1. + np.exp(-x)) + eps
+    return (1.0 - 2.0 * eps) / (1.0 + np.exp(-x)) + eps
+
 
 def orderedlogistic_logpdf(value, eta, cutpoints):
     c = np.concatenate(([-np.inf], cutpoints, [np.inf]))
     p = invlogit(eta - c[value]) - invlogit(eta - c[value + 1])
     return np.log(p)
 
+
 class Simplex(object):
     def __init__(self, n):
         self.vals = list(simplex_values(n))
@@ -359,7 +426,9 @@ def __init__(self, n):
 class MultiSimplex(object):
     def __init__(self, n_dependent, n_independent):
         self.vals = []
-        for simplex_value in itertools.product(simplex_values(n_dependent), repeat=n_independent):
+        for simplex_value in itertools.product(
+            simplex_values(n_dependent), repeat=n_independent
+        ):
             self.vals.append(np.vstack(simplex_value))
         self.shape = (n_independent, n_dependent)
         self.dtype = Unit.dtype
@@ -375,18 +444,21 @@ def PdMatrix(n):
     else:
         raise ValueError("n out of bounds")
 
-PdMatrix1 = Domain([np.eye(1), [[.5]]], edges=(None, None))
 
-PdMatrix2 = Domain([np.eye(2), [[.5, .05], [.05, 4.5]]], edges=(None, None))
+PdMatrix1 = Domain([np.eye(1), [[0.5]]], edges=(None, None))
+
+PdMatrix2 = Domain([np.eye(2), [[0.5, 0.05], [0.05, 4.5]]], edges=(None, None))
 
 PdMatrix3 = Domain(
-    [np.eye(3), [[.5, .1, 0], [.1, 1, 0], [0, 0, 2.5]]], edges=(None, None))
+    [np.eye(3), [[0.5, 0.1, 0], [0.1, 1, 0], [0, 0, 2.5]]], edges=(None, None)
+)
 
 
 PdMatrixChol1 = Domain([np.eye(1), [[0.001]]], edges=(None, None))
 PdMatrixChol2 = Domain([np.eye(2), [[0.1, 0], [10, 1]]], edges=(None, None))
-PdMatrixChol3 = Domain([np.eye(3), [[0.1, 0, 0], [10, 100, 0], [0, 1, 10]]],
-                       edges=(None, None))
+PdMatrixChol3 = Domain(
+    [np.eye(3), [[0.1, 0, 0], [10, 100, 0], [0, 1, 10]]], edges=(None, None)
+)
 
 
 def PdMatrixChol(n):
@@ -402,8 +474,9 @@ def PdMatrixChol(n):
 
 PdMatrixCholUpper1 = Domain([np.eye(1), [[0.001]]], edges=(None, None))
 PdMatrixCholUpper2 = Domain([np.eye(2), [[0.1, 10], [0, 1]]], edges=(None, None))
-PdMatrixCholUpper3 = Domain([np.eye(3), [[0.1, 10, 0], [0, 100, 1], [0, 0, 10]]],
-                            edges=(None, None))
+PdMatrixCholUpper3 = Domain(
+    [np.eye(3), [[0.1, 10, 0], [0, 100, 1], [0, 0, 10]]], edges=(None, None)
+)
 
 
 def PdMatrixCholUpper(n):
@@ -423,42 +496,61 @@ def RandomPdMatrix(n):
 
 
 class TestMatchesScipy(SeededTest):
-    def pymc3_matches_scipy(self, pymc3_dist, domain, paramdomains, scipy_dist,
-                            decimal=None, extra_args=None, scipy_args=None):
+    def pymc3_matches_scipy(
+        self,
+        pymc3_dist,
+        domain,
+        paramdomains,
+        scipy_dist,
+        decimal=None,
+        extra_args=None,
+        scipy_args=None,
+    ):
         if extra_args is None:
             extra_args = {}
         if scipy_args is None:
             scipy_args = {}
         model = build_model(pymc3_dist, domain, paramdomains, extra_args)
-        value = model.named_vars['value']
+        value = model.named_vars["value"]
 
         def logp(args):
             args.update(scipy_args)
             return scipy_dist(**args)
+
         self.check_logp(model, value, domain, paramdomains, logp, decimal=decimal)
 
-    def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal=None):
+    def check_logp(
+        self, model, value, domain, paramdomains, logp_reference, decimal=None
+    ):
         domains = paramdomains.copy()
-        domains['value'] = domain
+        domains["value"] = domain
         logp = model.fastlogp
         for pt in product(domains, n_samples=100):
             pt = Point(pt, model=model)
             if decimal is None:
                 decimal = select_by_precision(float64=6, float32=3)
-            assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt))
+            assert_almost_equal(
+                logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt)
+            )
 
-    def check_logcdf(self, pymc3_dist, domain, paramdomains, scipy_logcdf, decimal=None):
+    def check_logcdf(
+        self, pymc3_dist, domain, paramdomains, scipy_logcdf, decimal=None
+    ):
         domains = paramdomains.copy()
-        domains['value'] = domain
+        domains["value"] = domain
         if decimal is None:
             decimal = select_by_precision(float64=6, float32=3)
         for pt in product(domains, n_samples=100):
             params = dict(pt)
             scipy_cdf = scipy_logcdf(**params)
-            value = params.pop('value')
+            value = params.pop("value")
             dist = pymc3_dist.dist(**params)
-            assert_almost_equal(dist.logcdf(value).tag.test_value, scipy_cdf,
-                                decimal=decimal, err_msg=str(pt))
+            assert_almost_equal(
+                dist.logcdf(value).tag.test_value,
+                scipy_cdf,
+                decimal=decimal,
+                err_msg=str(pt),
+            )
 
     def check_int_to_1(self, model, value, domain, paramdomains):
         pdf = model.fastfn(exp(model.logpt))
@@ -478,9 +570,8 @@ def check_dlogp(self, model, value, domain, paramdomains):
             return
 
         domains = paramdomains.copy()
-        domains['value'] = domain
-        bij = DictToArrayBijection(
-            ArrayOrdering(model.cont_vars), model.test_point)
+        domains["value"] = domain
+        bij = DictToArrayBijection(ArrayOrdering(model.cont_vars), model.test_point)
         dlogp = bij.mapf(model.fastdlogp(model.cont_vars))
         logp = bij.mapf(model.fastlogp)
 
@@ -495,7 +586,9 @@ def wrapped_logp(x):
             pt = Point(pt, model=model)
             pt = bij.map(pt)
             decimals = select_by_precision(float64=6, float32=4)
-            assert_almost_equal(dlogp(pt), ndlogp(pt), decimal=decimals, err_msg=str(pt))
+            assert_almost_equal(
+                dlogp(pt), ndlogp(pt), decimal=decimals, err_msg=str(pt)
+            )
 
     def checkd(self, distfam, valuedomain, vardomains, checks=None, extra_args=None):
         if checks is None:
@@ -505,328 +598,547 @@ def checkd(self, distfam, valuedomain, vardomains, checks=None, extra_args=None)
             extra_args = {}
         m = build_model(distfam, valuedomain, vardomains, extra_args=extra_args)
         for check in checks:
-            check(m, m.named_vars['value'], valuedomain, vardomains)
+            check(m, m.named_vars["value"], valuedomain, vardomains)
 
     def test_uniform(self):
         self.pymc3_matches_scipy(
-            Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif},
-            lambda value, lower, upper: sp.uniform.logpdf(value, lower, upper - lower))
-        self.check_logcdf(Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif},
-                          lambda value, lower, upper: sp.uniform.logcdf(value, lower, upper - lower))
+            Uniform,
+            Runif,
+            {"lower": -Rplusunif, "upper": Rplusunif},
+            lambda value, lower, upper: sp.uniform.logpdf(value, lower, upper - lower),
+        )
+        self.check_logcdf(
+            Uniform,
+            Runif,
+            {"lower": -Rplusunif, "upper": Rplusunif},
+            lambda value, lower, upper: sp.uniform.logcdf(value, lower, upper - lower),
+        )
 
     def test_triangular(self):
         self.pymc3_matches_scipy(
-            Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif},
-            lambda value, c, lower, upper: sp.triang.logpdf(value, c-lower, lower, upper-lower))
-        self.check_logcdf(Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif},
-                          lambda value, c, lower, upper: sp.triang.logcdf(value, c-lower, lower, upper-lower))
+            Triangular,
+            Runif,
+            {"lower": -Rplusunif, "c": Runif, "upper": Rplusunif},
+            lambda value, c, lower, upper: sp.triang.logpdf(
+                value, c - lower, lower, upper - lower
+            ),
+        )
+        self.check_logcdf(
+            Triangular,
+            Runif,
+            {"lower": -Rplusunif, "c": Runif, "upper": Rplusunif},
+            lambda value, c, lower, upper: sp.triang.logcdf(
+                value, c - lower, lower, upper - lower
+            ),
+        )
 
     def test_bound_normal(self):
-        PositiveNormal = Bound(Normal, lower=0.)
-        self.pymc3_matches_scipy(PositiveNormal, Rplus, {'mu': Rplus, 'sd': Rplus},
-                                 lambda value, mu, sd: sp.norm.logpdf(value, mu, sd),
-                                 decimal=select_by_precision(float64=6, float32=-1))
-        with Model(): x = PositiveNormal('x', mu=0, sd=1, transform=None)
-        assert np.isinf(x.logp({'x':-1}))
+        PositiveNormal = Bound(Normal, lower=0.0)
+        self.pymc3_matches_scipy(
+            PositiveNormal,
+            Rplus,
+            {"mu": Rplus, "sd": Rplus},
+            lambda value, mu, sd: sp.norm.logpdf(value, mu, sd),
+            decimal=select_by_precision(float64=6, float32=-1),
+        )
+        with Model():
+            x = PositiveNormal("x", mu=0, sd=1, transform=None)
+        assert np.isinf(x.logp({"x": -1}))
 
     def test_discrete_unif(self):
         self.pymc3_matches_scipy(
-            DiscreteUniform, Rdunif, {'lower': -Rplusdunif, 'upper': Rplusdunif},
-            lambda value, lower, upper: sp.randint.logpmf(value, lower, upper + 1))
+            DiscreteUniform,
+            Rdunif,
+            {"lower": -Rplusdunif, "upper": Rplusdunif},
+            lambda value, lower, upper: sp.randint.logpmf(value, lower, upper + 1),
+        )
 
     def test_flat(self):
         self.pymc3_matches_scipy(Flat, Runif, {}, lambda value: 0)
         with Model():
-            x = Flat('a')
+            x = Flat("a")
             assert_allclose(x.tag.test_value, 0)
         self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5))
         # Check infinite cases individually.
-        assert 0. == Flat.dist().logcdf(np.inf).tag.test_value
+        assert 0.0 == Flat.dist().logcdf(np.inf).tag.test_value
         assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value
 
     def test_half_flat(self):
         self.pymc3_matches_scipy(HalfFlat, Rplus, {}, lambda value: 0)
         with Model():
-            x = HalfFlat('a', shape=2)
+            x = HalfFlat("a", shape=2)
             assert_allclose(x.tag.test_value, 1)
             assert x.tag.test_value.shape == (2,)
         self.check_logcdf(HalfFlat, Runif, {}, lambda value: -np.inf)
         # Check infinite cases individually.
-        assert 0. == HalfFlat.dist().logcdf(np.inf).tag.test_value
+        assert 0.0 == HalfFlat.dist().logcdf(np.inf).tag.test_value
         assert -np.inf == HalfFlat.dist().logcdf(-np.inf).tag.test_value
 
     def test_normal(self):
-        self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus},
-                                 lambda value, mu, sd: sp.norm.logpdf(value, mu, sd),
-                                 decimal=select_by_precision(float64=6, float32=1)
-                                 )
-        self.check_logcdf(Normal, R, {'mu': R, 'sd': Rplus},
-                          lambda value, mu, sd: sp.norm.logcdf(value, mu, sd))
+        self.pymc3_matches_scipy(
+            Normal,
+            R,
+            {"mu": R, "sd": Rplus},
+            lambda value, mu, sd: sp.norm.logpdf(value, mu, sd),
+            decimal=select_by_precision(float64=6, float32=1),
+        )
+        self.check_logcdf(
+            Normal,
+            R,
+            {"mu": R, "sd": Rplus},
+            lambda value, mu, sd: sp.norm.logcdf(value, mu, sd),
+        )
 
     def test_truncated_normal(self):
         def scipy_logp(value, mu, sd, lower, upper):
             return sp.truncnorm.logpdf(
-                value, (lower-mu)/sd, (upper-mu)/sd, loc=mu, scale=sd)
-
-        args = {'mu': array(-2.1), 'lower': array(-100.), 'upper': array(0.01),
-                'sd': array(0.01)}
-        val = TruncatedNormal.dist(**args).logp(0.)
+                value, (lower - mu) / sd, (upper - mu) / sd, loc=mu, scale=sd
+            )
+
+        args = {
+            "mu": array(-2.1),
+            "lower": array(-100.0),
+            "upper": array(0.01),
+            "sd": array(0.01),
+        }
+        val = TruncatedNormal.dist(**args).logp(0.0)
         assert_allclose(val.eval(), scipy_logp(value=0, **args))
 
         self.pymc3_matches_scipy(
-            TruncatedNormal, R,
-            {'mu': R, 'sd': Rplusbig, 'lower': -Rplusbig, 'upper': Rplusbig},
+            TruncatedNormal,
+            R,
+            {"mu": R, "sd": Rplusbig, "lower": -Rplusbig, "upper": Rplusbig},
             scipy_logp,
-            decimal=select_by_precision(float64=6, float32=1)
+            decimal=select_by_precision(float64=6, float32=1),
         )
 
     def test_half_normal(self):
-        self.pymc3_matches_scipy(HalfNormal, Rplus, {'sd': Rplus},
-                                 lambda value, sd: sp.halfnorm.logpdf(value, scale=sd),
-                                 decimal=select_by_precision(float64=6, float32=-1)
-                                 )
-        self.check_logcdf(HalfNormal, Rplus, {'sd': Rplus},
-                          lambda value, sd: sp.halfnorm.logcdf(value, scale=sd))
+        self.pymc3_matches_scipy(
+            HalfNormal,
+            Rplus,
+            {"sd": Rplus},
+            lambda value, sd: sp.halfnorm.logpdf(value, scale=sd),
+            decimal=select_by_precision(float64=6, float32=-1),
+        )
+        self.check_logcdf(
+            HalfNormal,
+            Rplus,
+            {"sd": Rplus},
+            lambda value, sd: sp.halfnorm.logcdf(value, scale=sd),
+        )
 
     def test_chi_squared(self):
-        self.pymc3_matches_scipy(ChiSquared, Rplus, {'nu': Rplusdunif},
-                                 lambda value, nu: sp.chi2.logpdf(value, df=nu))
+        self.pymc3_matches_scipy(
+            ChiSquared,
+            Rplus,
+            {"nu": Rplusdunif},
+            lambda value, nu: sp.chi2.logpdf(value, df=nu),
+        )
 
     @pytest.mark.xfail(reason="Poor CDF in SciPy. See scipy/scipy#869 for details.")
     def test_wald_scipy(self):
-        self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
-                                 lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha),
-                                 decimal=select_by_precision(float64=6, float32=1)
-                                 )
-        self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
-                          lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha))
-
-    @pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [
-        (.5, .001, .5, None, 0., -124500.7257914),
-        (1., .5, .001, None, 0., -4.3733162),
-        (2., 1., None, None, 0., -2.2086593),
-        (5., 2., 2.5, None, 0., -3.4374500),
-        (7.5, 5., None, 1., 0., -3.2199074),
-        (15., 10., None, .75, 0., -4.0360623),
-        (50., 15., None, .66666, 0., -6.1801249),
-        (.5, .001, 0.5, None, 0., -124500.7257914),
-        (1., .5, .001, None, .5, -3.3330954),
-        (2., 1., None, None, 1., -0.9189385),
-        (5., 2., 2.5, None, 2., -2.2128783),
-        (7.5, 5., None, 1., 2.5, -2.5283764),
-        (15., 10., None, .75, 5., -3.3653647),
-        (50., 15., None, .666666, 10., -5.6481874)
-    ])
+        self.pymc3_matches_scipy(
+            Wald,
+            Rplus,
+            {"mu": Rplus, "alpha": Rplus},
+            lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha),
+            decimal=select_by_precision(float64=6, float32=1),
+        )
+        self.check_logcdf(
+            Wald,
+            Rplus,
+            {"mu": Rplus, "alpha": Rplus},
+            lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha),
+        )
+
+    @pytest.mark.parametrize(
+        "value,mu,lam,phi,alpha,logp",
+        [
+            (0.5, 0.001, 0.5, None, 0.0, -124500.7257914),
+            (1.0, 0.5, 0.001, None, 0.0, -4.3733162),
+            (2.0, 1.0, None, None, 0.0, -2.2086593),
+            (5.0, 2.0, 2.5, None, 0.0, -3.4374500),
+            (7.5, 5.0, None, 1.0, 0.0, -3.2199074),
+            (15.0, 10.0, None, 0.75, 0.0, -4.0360623),
+            (50.0, 15.0, None, 0.66666, 0.0, -6.1801249),
+            (0.5, 0.001, 0.5, None, 0.0, -124500.7257914),
+            (1.0, 0.5, 0.001, None, 0.5, -3.3330954),
+            (2.0, 1.0, None, None, 1.0, -0.9189385),
+            (5.0, 2.0, 2.5, None, 2.0, -2.2128783),
+            (7.5, 5.0, None, 1.0, 2.5, -2.5283764),
+            (15.0, 10.0, None, 0.75, 5.0, -3.3653647),
+            (50.0, 15.0, None, 0.666666, 10.0, -5.6481874),
+        ],
+    )
     def test_wald(self, value, mu, lam, phi, alpha, logp):
         # Log probabilities calculated using the dIG function from the R package gamlss.
         # See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or
         # http://www.gamlss.org/.
         with Model() as model:
-            Wald('wald', mu=mu, lam=lam, phi=phi, alpha=alpha, transform=None)
-        pt = {'wald': value}
+            Wald("wald", mu=mu, lam=lam, phi=phi, alpha=alpha, transform=None)
+        pt = {"wald": value}
         decimals = select_by_precision(float64=6, float32=1)
         assert_almost_equal(model.fastlogp(pt), logp, decimal=decimals, err_msg=str(pt))
 
     def test_beta(self):
-        self.pymc3_matches_scipy(Beta, Unit, {'alpha': Rplus, 'beta': Rplus},
-                                 lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta))
-        self.pymc3_matches_scipy(Beta, Unit, {'mu': Unit, 'sd': Rplus}, beta_mu_sd)
-        self.check_logcdf(Beta, Unit, {'alpha': Rplus, 'beta': Rplus},
-                                lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta))
+        self.pymc3_matches_scipy(
+            Beta,
+            Unit,
+            {"alpha": Rplus, "beta": Rplus},
+            lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta),
+        )
+        self.pymc3_matches_scipy(Beta, Unit, {"mu": Unit, "sd": Rplus}, beta_mu_sd)
+        self.check_logcdf(
+            Beta,
+            Unit,
+            {"alpha": Rplus, "beta": Rplus},
+            lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta),
+        )
 
     def test_kumaraswamy(self):
         # Scipy does not have a built-in Kumaraswamy pdf
         def scipy_log_pdf(value, a, b):
-            return np.log(a) + np.log(b) + (a - 1) * np.log(value) + (b - 1) * np.log(1 - value ** a)
-        self.pymc3_matches_scipy(Kumaraswamy, Unit, {'a': Rplus, 'b': Rplus}, scipy_log_pdf)
+            return (
+                np.log(a)
+                + np.log(b)
+                + (a - 1) * np.log(value)
+                + (b - 1) * np.log(1 - value ** a)
+            )
+
+        self.pymc3_matches_scipy(
+            Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_pdf
+        )
 
     def test_exponential(self):
-        self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus},
-                                 lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam))
-        self.check_logcdf(Exponential, Rplus, {'lam': Rplus},
-                          lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam))
+        self.pymc3_matches_scipy(
+            Exponential,
+            Rplus,
+            {"lam": Rplus},
+            lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam),
+        )
+        self.check_logcdf(
+            Exponential,
+            Rplus,
+            {"lam": Rplus},
+            lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam),
+        )
 
     def test_geometric(self):
-        self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit},
-                                 lambda value, p: np.log(sp.geom.pmf(value, p)))
+        self.pymc3_matches_scipy(
+            Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p))
+        )
 
     def test_negative_binomial(self):
         def test_fun(value, mu, alpha):
             return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha))
-        self.pymc3_matches_scipy(NegativeBinomial, Nat, {
-                            'mu': Rplus, 'alpha': Rplus}, test_fun)
+
+        self.pymc3_matches_scipy(
+            NegativeBinomial, Nat, {"mu": Rplus, "alpha": Rplus}, test_fun
+        )
 
     def test_laplace(self):
-        self.pymc3_matches_scipy(Laplace, R, {'mu': R, 'b': Rplus},
-                                 lambda value, mu, b: sp.laplace.logpdf(value, mu, b))
-        self.check_logcdf(Laplace, R, {'mu': R, 'b': Rplus},
-                          lambda value, mu, b: sp.laplace.logcdf(value, mu, b))
+        self.pymc3_matches_scipy(
+            Laplace,
+            R,
+            {"mu": R, "b": Rplus},
+            lambda value, mu, b: sp.laplace.logpdf(value, mu, b),
+        )
+        self.check_logcdf(
+            Laplace,
+            R,
+            {"mu": R, "b": Rplus},
+            lambda value, mu, b: sp.laplace.logcdf(value, mu, b),
+        )
 
     def test_lognormal(self):
         self.pymc3_matches_scipy(
-            Lognormal, Rplus, {'mu': R, 'tau': Rplusbig},
-            lambda value, mu, tau: floatX(sp.lognorm.logpdf(value, tau**-.5, 0, np.exp(mu))))
-        self.check_logcdf(Lognormal, Rplus, {'mu': R, 'tau': Rplusbig},
-                          lambda value, mu, tau: sp.lognorm.logcdf(value, tau**-.5, 0, np.exp(mu)))
+            Lognormal,
+            Rplus,
+            {"mu": R, "tau": Rplusbig},
+            lambda value, mu, tau: floatX(
+                sp.lognorm.logpdf(value, tau ** -0.5, 0, np.exp(mu))
+            ),
+        )
+        self.check_logcdf(
+            Lognormal,
+            Rplus,
+            {"mu": R, "tau": Rplusbig},
+            lambda value, mu, tau: sp.lognorm.logcdf(value, tau ** -0.5, 0, np.exp(mu)),
+        )
 
     def test_t(self):
-        self.pymc3_matches_scipy(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus},
-                                 lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam**-0.5))
-        self.check_logcdf(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus},
-                          lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam**-0.5))
+        self.pymc3_matches_scipy(
+            StudentT,
+            R,
+            {"nu": Rplus, "mu": R, "lam": Rplus},
+            lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam ** -0.5),
+        )
+        self.check_logcdf(
+            StudentT,
+            R,
+            {"nu": Rplus, "mu": R, "lam": Rplus},
+            lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam ** -0.5),
+        )
 
     def test_cauchy(self):
-        self.pymc3_matches_scipy(Cauchy, R, {'alpha': R, 'beta': Rplusbig},
-                                 lambda value, alpha, beta: sp.cauchy.logpdf(value, alpha, beta))
-        self.check_logcdf(Cauchy, R, {'alpha': R, 'beta': Rplusbig},
-                          lambda value, alpha, beta: sp.cauchy.logcdf(value, alpha, beta))
+        self.pymc3_matches_scipy(
+            Cauchy,
+            R,
+            {"alpha": R, "beta": Rplusbig},
+            lambda value, alpha, beta: sp.cauchy.logpdf(value, alpha, beta),
+        )
+        self.check_logcdf(
+            Cauchy,
+            R,
+            {"alpha": R, "beta": Rplusbig},
+            lambda value, alpha, beta: sp.cauchy.logcdf(value, alpha, beta),
+        )
 
     def test_half_cauchy(self):
-        self.pymc3_matches_scipy(HalfCauchy, Rplus, {'beta': Rplusbig},
-                                 lambda value, beta: sp.halfcauchy.logpdf(value, scale=beta))
-        self.check_logcdf(HalfCauchy, Rplus, {'beta': Rplusbig},
-                          lambda value, beta: sp.halfcauchy.logcdf(value, scale=beta))
+        self.pymc3_matches_scipy(
+            HalfCauchy,
+            Rplus,
+            {"beta": Rplusbig},
+            lambda value, beta: sp.halfcauchy.logpdf(value, scale=beta),
+        )
+        self.check_logcdf(
+            HalfCauchy,
+            Rplus,
+            {"beta": Rplusbig},
+            lambda value, beta: sp.halfcauchy.logcdf(value, scale=beta),
+        )
 
     def test_gamma(self):
         self.pymc3_matches_scipy(
-            Gamma, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},
-            lambda value, alpha, beta: sp.gamma.logpdf(value, alpha, scale=1.0 / beta))
+            Gamma,
+            Rplus,
+            {"alpha": Rplusbig, "beta": Rplusbig},
+            lambda value, alpha, beta: sp.gamma.logpdf(value, alpha, scale=1.0 / beta),
+        )
 
         def test_fun(value, mu, sd):
-            return sp.gamma.logpdf(value, mu**2 / sd**2, scale=1.0 / (mu / sd**2))
+            return sp.gamma.logpdf(value, mu ** 2 / sd ** 2, scale=1.0 / (mu / sd ** 2))
+
         self.pymc3_matches_scipy(
-            Gamma, Rplus, {'mu': Rplusbig, 'sd': Rplusbig}, test_fun)
+            Gamma, Rplus, {"mu": Rplusbig, "sd": Rplusbig}, test_fun
+        )
 
     def test_inverse_gamma(self):
         self.pymc3_matches_scipy(
-            InverseGamma, Rplus, {'alpha': Rplus, 'beta': Rplus},
-            lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta))
+            InverseGamma,
+            Rplus,
+            {"alpha": Rplus, "beta": Rplus},
+            lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta),
+        )
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"),
-                           reason="Fails on float32 due to scaling issues")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"),
+        reason="Fails on float32 due to scaling issues",
+    )
     def test_inverse_gamma_alt_params(self):
         def test_fun(value, mu, sd):
             alpha, beta = InverseGamma._get_alpha_beta(None, None, mu, sd)
             return sp.invgamma.logpdf(value, alpha, scale=beta)
+
         self.pymc3_matches_scipy(
-            InverseGamma, Rplus, {'mu': Rplus, 'sd': Rplus}, test_fun)
+            InverseGamma, Rplus, {"mu": Rplus, "sd": Rplus}, test_fun
+        )
 
     def test_pareto(self):
-        self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
-                                 lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m))
-        self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
-                          lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m))
+        self.pymc3_matches_scipy(
+            Pareto,
+            Rplus,
+            {"alpha": Rplusbig, "m": Rplusbig},
+            lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m),
+        )
+        self.check_logcdf(
+            Pareto,
+            Rplus,
+            {"alpha": Rplusbig, "m": Rplusbig},
+            lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m),
+        )
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"),
+        reason="Fails on float32 due to inf issues",
+    )
     def test_weibull(self):
-        self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},
-                                 lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta),
-                                 )
-        self.check_logcdf(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},
-                          lambda value, alpha, beta:
-                          sp.exponweib.logcdf(value, 1, alpha, scale=beta),)
+        self.pymc3_matches_scipy(
+            Weibull,
+            Rplus,
+            {"alpha": Rplusbig, "beta": Rplusbig},
+            lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta),
+        )
+        self.check_logcdf(
+            Weibull,
+            Rplus,
+            {"alpha": Rplusbig, "beta": Rplusbig},
+            lambda value, alpha, beta: sp.exponweib.logcdf(value, 1, alpha, scale=beta),
+        )
 
     def test_half_studentt(self):
         # this is only testing for nu=1 (halfcauchy)
-        self.pymc3_matches_scipy(HalfStudentT, Rplus, {'sd': Rplus},
-                                 lambda value, sd: sp.halfcauchy.logpdf(value, 0, sd))
+        self.pymc3_matches_scipy(
+            HalfStudentT,
+            Rplus,
+            {"sd": Rplus},
+            lambda value, sd: sp.halfcauchy.logpdf(value, 0, sd),
+        )
 
     def test_skew_normal(self):
-        self.pymc3_matches_scipy(SkewNormal, R, {'mu': R, 'sd': Rplusbig, 'alpha': R},
-                                 lambda value, alpha, mu, sd: sp.skewnorm.logpdf(value, alpha, mu, sd))
+        self.pymc3_matches_scipy(
+            SkewNormal,
+            R,
+            {"mu": R, "sd": Rplusbig, "alpha": R},
+            lambda value, alpha, mu, sd: sp.skewnorm.logpdf(value, alpha, mu, sd),
+        )
 
     def test_binomial(self):
-        self.pymc3_matches_scipy(Binomial, Nat, {'n': NatSmall, 'p': Unit},
-                                 lambda value, n, p: sp.binom.logpmf(value, n, p))
+        self.pymc3_matches_scipy(
+            Binomial,
+            Nat,
+            {"n": NatSmall, "p": Unit},
+            lambda value, n, p: sp.binom.logpmf(value, n, p),
+        )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_beta_binomial(self):
-        self.checkd(BetaBinomial, Nat, {'alpha': Rplus, 'beta': Rplus, 'n': NatSmall})
+        self.checkd(BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall})
 
     def test_bernoulli(self):
         self.pymc3_matches_scipy(
-            Bernoulli, Bool, {'logit_p': R},
-            lambda value, logit_p: sp.bernoulli.logpmf(value, scipy.special.expit(logit_p)))
-        self.pymc3_matches_scipy(Bernoulli, Bool, {'p': Unit},
-                                 lambda value, p: sp.bernoulli.logpmf(value, p))
-
+            Bernoulli,
+            Bool,
+            {"logit_p": R},
+            lambda value, logit_p: sp.bernoulli.logpmf(
+                value, scipy.special.expit(logit_p)
+            ),
+        )
+        self.pymc3_matches_scipy(
+            Bernoulli, Bool, {"p": Unit}, lambda value, p: sp.bernoulli.logpmf(value, p)
+        )
 
     def test_discrete_weibull(self):
-        self.pymc3_matches_scipy(DiscreteWeibull, Nat,
-                {'q': Unit, 'beta': Rplusdunif}, discrete_weibull_logpmf)
+        self.pymc3_matches_scipy(
+            DiscreteWeibull,
+            Nat,
+            {"q": Unit, "beta": Rplusdunif},
+            discrete_weibull_logpmf,
+        )
 
     def test_poisson(self):
-        self.pymc3_matches_scipy(Poisson, Nat, {'mu': Rplus},
-                                 lambda value, mu: sp.poisson.logpmf(value, mu))
+        self.pymc3_matches_scipy(
+            Poisson, Nat, {"mu": Rplus}, lambda value, mu: sp.poisson.logpmf(value, mu)
+        )
 
     def test_bound_poisson(self):
-        NonZeroPoisson = Bound(Poisson, lower=1.)
-        self.pymc3_matches_scipy(NonZeroPoisson, PosNat, {'mu': Rplus},
-                                lambda value, mu: sp.poisson.logpmf(value, mu))
+        NonZeroPoisson = Bound(Poisson, lower=1.0)
+        self.pymc3_matches_scipy(
+            NonZeroPoisson,
+            PosNat,
+            {"mu": Rplus},
+            lambda value, mu: sp.poisson.logpmf(value, mu),
+        )
 
-        with Model(): x = NonZeroPoisson('x', mu=4)
-        assert np.isinf(x.logp({'x':0}))
+        with Model():
+            x = NonZeroPoisson("x", mu=4)
+        assert np.isinf(x.logp({"x": 0}))
 
     def test_constantdist(self):
-        self.pymc3_matches_scipy(Constant, I, {'c': I},
-                                 lambda value, c: np.log(c == value))
+        self.pymc3_matches_scipy(
+            Constant, I, {"c": I}, lambda value, c: np.log(c == value)
+        )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_zeroinflatedpoisson(self):
-        self.checkd(ZeroInflatedPoisson, Nat, {'theta': Rplus, 'psi': Unit})
+        self.checkd(ZeroInflatedPoisson, Nat, {"theta": Rplus, "psi": Unit})
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_zeroinflatednegativebinomial(self):
-        self.checkd(ZeroInflatedNegativeBinomial, Nat,
-                    {'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit})
+        self.checkd(
+            ZeroInflatedNegativeBinomial,
+            Nat,
+            {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit},
+        )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_zeroinflatedbinomial(self):
-        self.checkd(ZeroInflatedBinomial, Nat,
-                    {'n': NatSmall, 'p': Unit, 'psi': Unit})
+        self.checkd(ZeroInflatedBinomial, Nat, {"n": NatSmall, "p": Unit, "psi": Unit})
 
-    @pytest.mark.parametrize('n', [1, 2, 3])
+    @pytest.mark.parametrize("n", [1, 2, 3])
     def test_mvnormal(self, n):
-        self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n),
-                                 {'mu': Vector(R, n), 'tau': PdMatrix(n)},
-                                 normal_logpdf_tau)
-        self.pymc3_matches_scipy(MvNormal, Vector(R, n),
-                                 {'mu': Vector(R, n), 'tau': PdMatrix(n)},
-                                 normal_logpdf_tau)
-        self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n),
-                                 {'mu': Vector(R, n), 'cov': PdMatrix(n)},
-                                 normal_logpdf_cov)
-        self.pymc3_matches_scipy(MvNormal, Vector(R, n),
-                                 {'mu': Vector(R, n), 'cov': PdMatrix(n)},
-                                 normal_logpdf_cov)
-        self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n),
-                                 {'mu': Vector(R, n), 'chol': PdMatrixChol(n)},
-                                 normal_logpdf_chol,
-                                 decimal=select_by_precision(float64=6, float32=-1))
-        self.pymc3_matches_scipy(MvNormal, Vector(R, n),
-                                 {'mu': Vector(R, n), 'chol': PdMatrixChol(n)},
-                                 normal_logpdf_chol,
-                                 decimal=select_by_precision(float64=6, float32=0))
+        self.pymc3_matches_scipy(
+            MvNormal,
+            RealMatrix(5, n),
+            {"mu": Vector(R, n), "tau": PdMatrix(n)},
+            normal_logpdf_tau,
+        )
+        self.pymc3_matches_scipy(
+            MvNormal,
+            Vector(R, n),
+            {"mu": Vector(R, n), "tau": PdMatrix(n)},
+            normal_logpdf_tau,
+        )
+        self.pymc3_matches_scipy(
+            MvNormal,
+            RealMatrix(5, n),
+            {"mu": Vector(R, n), "cov": PdMatrix(n)},
+            normal_logpdf_cov,
+        )
+        self.pymc3_matches_scipy(
+            MvNormal,
+            Vector(R, n),
+            {"mu": Vector(R, n), "cov": PdMatrix(n)},
+            normal_logpdf_cov,
+        )
+        self.pymc3_matches_scipy(
+            MvNormal,
+            RealMatrix(5, n),
+            {"mu": Vector(R, n), "chol": PdMatrixChol(n)},
+            normal_logpdf_chol,
+            decimal=select_by_precision(float64=6, float32=-1),
+        )
+        self.pymc3_matches_scipy(
+            MvNormal,
+            Vector(R, n),
+            {"mu": Vector(R, n), "chol": PdMatrixChol(n)},
+            normal_logpdf_chol,
+            decimal=select_by_precision(float64=6, float32=0),
+        )
 
         def MvNormalUpper(*args, **kwargs):
             return MvNormal(lower=False, *args, **kwargs)
 
-        self.pymc3_matches_scipy(MvNormalUpper, Vector(R, n),
-                                 {'mu': Vector(R, n), 'chol': PdMatrixCholUpper(n)},
-                                 normal_logpdf_chol_upper,
-                                 decimal=select_by_precision(float64=6, float32=0))
+        self.pymc3_matches_scipy(
+            MvNormalUpper,
+            Vector(R, n),
+            {"mu": Vector(R, n), "chol": PdMatrixCholUpper(n)},
+            normal_logpdf_chol_upper,
+            decimal=select_by_precision(float64=6, float32=0),
+        )
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"),
+        reason="Fails on float32 due to inf issues",
+    )
     def test_mvnormal_indef(self):
         cov_val = np.array([[1, 0.5], [0.5, -2]])
-        cov = tt.matrix('cov')
+        cov = tt.matrix("cov")
         cov.tag.test_value = np.eye(2)
         mu = floatX(np.zeros(2))
-        x = tt.vector('x')
+        x = tt.vector("x")
         x.tag.test_value = np.zeros(2)
         logp = MvNormal.dist(mu=mu, cov=cov).logp(x)
         f_logp = theano.function([cov, x], logp)
@@ -845,96 +1157,153 @@ def test_mvnormal_indef(self):
     def test_mvnormal_init_fail(self):
         with Model():
             with pytest.raises(ValueError):
-                x = MvNormal('x', mu=np.zeros(3), shape=3)
+                x = MvNormal("x", mu=np.zeros(3), shape=3)
             with pytest.raises(ValueError):
-                x = MvNormal('x', mu=np.zeros(3), cov=np.eye(3), tau=np.eye(3), shape=3)
+                x = MvNormal("x", mu=np.zeros(3), cov=np.eye(3), tau=np.eye(3), shape=3)
 
-    @pytest.mark.parametrize('n', [1, 2, 3])
+    @pytest.mark.parametrize("n", [1, 2, 3])
     def test_matrixnormal(self, n):
         mat_scale = 1e3  # To reduce logp magnitude
-        mean_scale = .1
-        self.pymc3_matches_scipy(MatrixNormal, RealMatrix(n, n),
-                                 {'mu': RealMatrix(n, n)*mean_scale,
-                                  'rowcov': PdMatrix(n)*mat_scale,
-                                  'colcov': PdMatrix(n)*mat_scale},
-                                 matrix_normal_logpdf_cov)
-        self.pymc3_matches_scipy(MatrixNormal, RealMatrix(2, n),
-                                 {'mu': RealMatrix(2, n)*mean_scale,
-                                  'rowcov': PdMatrix(2)*mat_scale,
-                                  'colcov': PdMatrix(n)*mat_scale},
-                                 matrix_normal_logpdf_cov)
-        self.pymc3_matches_scipy(MatrixNormal, RealMatrix(3, n),
-                                 {'mu': RealMatrix(3, n)*mean_scale,
-                                  'rowchol': PdMatrixChol(3)*mat_scale,
-                                  'colchol': PdMatrixChol(n)*mat_scale},
-                                 matrix_normal_logpdf_chol,
-                                 decimal=select_by_precision(float64=6, float32=-1))
-        self.pymc3_matches_scipy(MatrixNormal, RealMatrix(n, 3),
-                                 {'mu': RealMatrix(n, 3)*mean_scale,
-                                  'rowchol': PdMatrixChol(n)*mat_scale,
-                                  'colchol': PdMatrixChol(3)*mat_scale},
-                                 matrix_normal_logpdf_chol,
-                                 decimal=select_by_precision(float64=6, float32=0))
-
-    @pytest.mark.parametrize('n', [2, 3])
-    @pytest.mark.parametrize('m', [3])
-    @pytest.mark.parametrize('sigma', [None, 1.0])
+        mean_scale = 0.1
+        self.pymc3_matches_scipy(
+            MatrixNormal,
+            RealMatrix(n, n),
+            {
+                "mu": RealMatrix(n, n) * mean_scale,
+                "rowcov": PdMatrix(n) * mat_scale,
+                "colcov": PdMatrix(n) * mat_scale,
+            },
+            matrix_normal_logpdf_cov,
+        )
+        self.pymc3_matches_scipy(
+            MatrixNormal,
+            RealMatrix(2, n),
+            {
+                "mu": RealMatrix(2, n) * mean_scale,
+                "rowcov": PdMatrix(2) * mat_scale,
+                "colcov": PdMatrix(n) * mat_scale,
+            },
+            matrix_normal_logpdf_cov,
+        )
+        self.pymc3_matches_scipy(
+            MatrixNormal,
+            RealMatrix(3, n),
+            {
+                "mu": RealMatrix(3, n) * mean_scale,
+                "rowchol": PdMatrixChol(3) * mat_scale,
+                "colchol": PdMatrixChol(n) * mat_scale,
+            },
+            matrix_normal_logpdf_chol,
+            decimal=select_by_precision(float64=6, float32=-1),
+        )
+        self.pymc3_matches_scipy(
+            MatrixNormal,
+            RealMatrix(n, 3),
+            {
+                "mu": RealMatrix(n, 3) * mean_scale,
+                "rowchol": PdMatrixChol(n) * mat_scale,
+                "colchol": PdMatrixChol(3) * mat_scale,
+            },
+            matrix_normal_logpdf_chol,
+            decimal=select_by_precision(float64=6, float32=0),
+        )
+
+    @pytest.mark.parametrize("n", [2, 3])
+    @pytest.mark.parametrize("m", [3])
+    @pytest.mark.parametrize("sigma", [None, 1.0])
     def test_kroneckernormal(self, n, m, sigma):
         np.random.seed(5)
-        N = n*m
+        N = n * m
         covs = [RandomPdMatrix(n), RandomPdMatrix(m)]
         chols = list(map(np.linalg.cholesky, covs))
         evds = list(map(np.linalg.eigh, covs))
-        dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
-        mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
+        dom = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N)
+        mu = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N)
 
-        std_args = {'mu': mu}
-        cov_args = {'covs': covs}
-        chol_args = {'chols': chols}
-        evd_args = {'evds': evds}
+        std_args = {"mu": mu}
+        cov_args = {"covs": covs}
+        chol_args = {"chols": chols}
+        evd_args = {"evds": evds}
         if sigma is not None and sigma != 0:
-            std_args['sigma'] = Domain([sigma], edges=(None, None))
+            std_args["sigma"] = Domain([sigma], edges=(None, None))
         else:
             for args in [cov_args, chol_args, evd_args]:
-                args['sigma'] = sigma
+                args["sigma"] = sigma
 
         self.pymc3_matches_scipy(
-             KroneckerNormal, dom, std_args, kron_normal_logpdf_cov,
-             extra_args=cov_args, scipy_args=cov_args)
+            KroneckerNormal,
+            dom,
+            std_args,
+            kron_normal_logpdf_cov,
+            extra_args=cov_args,
+            scipy_args=cov_args,
+        )
         self.pymc3_matches_scipy(
-             KroneckerNormal, dom, std_args, kron_normal_logpdf_chol,
-             extra_args=chol_args, scipy_args=chol_args)
+            KroneckerNormal,
+            dom,
+            std_args,
+            kron_normal_logpdf_chol,
+            extra_args=chol_args,
+            scipy_args=chol_args,
+        )
         self.pymc3_matches_scipy(
-             KroneckerNormal, dom, std_args, kron_normal_logpdf_evd,
-             extra_args=evd_args, scipy_args=evd_args)
+            KroneckerNormal,
+            dom,
+            std_args,
+            kron_normal_logpdf_evd,
+            extra_args=evd_args,
+            scipy_args=evd_args,
+        )
 
-        dom = Domain([np.random.randn(2, N)*0.1], edges=(None, None), shape=(2, N))
+        dom = Domain([np.random.randn(2, N) * 0.1], edges=(None, None), shape=(2, N))
 
         self.pymc3_matches_scipy(
-             KroneckerNormal, dom, std_args, kron_normal_logpdf_cov,
-             extra_args=cov_args, scipy_args=cov_args)
+            KroneckerNormal,
+            dom,
+            std_args,
+            kron_normal_logpdf_cov,
+            extra_args=cov_args,
+            scipy_args=cov_args,
+        )
         self.pymc3_matches_scipy(
-             KroneckerNormal, dom, std_args, kron_normal_logpdf_chol,
-             extra_args=chol_args, scipy_args=chol_args)
+            KroneckerNormal,
+            dom,
+            std_args,
+            kron_normal_logpdf_chol,
+            extra_args=chol_args,
+            scipy_args=chol_args,
+        )
         self.pymc3_matches_scipy(
-             KroneckerNormal, dom, std_args, kron_normal_logpdf_evd,
-             extra_args=evd_args, scipy_args=evd_args)
+            KroneckerNormal,
+            dom,
+            std_args,
+            kron_normal_logpdf_evd,
+            extra_args=evd_args,
+            scipy_args=evd_args,
+        )
 
-    @pytest.mark.parametrize('n', [1, 2])
+    @pytest.mark.parametrize("n", [1, 2])
     def test_mvt(self, n):
-        self.pymc3_matches_scipy(MvStudentT, Vector(R, n),
-                                 {'nu': Rplus, 'Sigma': PdMatrix(n), 'mu': Vector(R, n)},
-                                 mvt_logpdf)
-        self.pymc3_matches_scipy(MvStudentT, RealMatrix(2, n),
-                                 {'nu': Rplus, 'Sigma': PdMatrix(n), 'mu': Vector(R, n)},
-                                 mvt_logpdf)
-
-    @pytest.mark.parametrize('n',[2,3,4])
-    def test_AR1(self, n):
-        self.pymc3_matches_scipy(AR1, Vector(R, n), {'k': Unit, 'tau_e': Rplus}, AR1_logpdf)
+        self.pymc3_matches_scipy(
+            MvStudentT,
+            Vector(R, n),
+            {"nu": Rplus, "Sigma": PdMatrix(n), "mu": Vector(R, n)},
+            mvt_logpdf,
+        )
+        self.pymc3_matches_scipy(
+            MvStudentT,
+            RealMatrix(2, n),
+            {"nu": Rplus, "Sigma": PdMatrix(n), "mu": Vector(R, n)},
+            mvt_logpdf,
+        )
 
+    @pytest.mark.parametrize("n", [2, 3, 4])
+    def test_AR1(self, n):
+        self.pymc3_matches_scipy(
+            AR1, Vector(R, n), {"k": Unit, "tau_e": Rplus}, AR1_logpdf
+        )
 
-    @pytest.mark.parametrize('n', [2, 3])
+    @pytest.mark.parametrize("n", [2, 3])
     def test_wishart(self, n):
         # This check compares the autodiff gradient to the numdiff gradient.
         # However, due to the strict constraints of the wishart,
@@ -945,195 +1314,236 @@ def test_wishart(self, n):
         #             checks=[self.check_dlogp])
         pass
 
-    @pytest.mark.parametrize('x,eta,n,lp', LKJ_CASES)
+    @pytest.mark.parametrize("x,eta,n,lp", LKJ_CASES)
     def test_lkj(self, x, eta, n, lp):
         with Model() as model:
-            LKJCorr('lkj', eta=eta, n=n, transform=None)
+            LKJCorr("lkj", eta=eta, n=n, transform=None)
 
-        pt = {'lkj': x}
+        pt = {"lkj": x}
         decimals = select_by_precision(float64=6, float32=4)
         assert_almost_equal(model.fastlogp(pt), lp, decimal=decimals, err_msg=str(pt))
 
-    @pytest.mark.parametrize('n', [2, 3])
+    @pytest.mark.parametrize("n", [2, 3])
     def test_dirichlet(self, n):
-        self.pymc3_matches_scipy(Dirichlet, Simplex(
-            n), {'a': Vector(Rplus, n)}, dirichlet_logpdf)
+        self.pymc3_matches_scipy(
+            Dirichlet, Simplex(n), {"a": Vector(Rplus, n)}, dirichlet_logpdf
+        )
 
     def test_dirichlet_2D(self):
-        self.pymc3_matches_scipy(Dirichlet, MultiSimplex(2, 2),
-                                 {'a': Vector(Vector(Rplus, 2), 2)}, dirichlet_logpdf)
+        self.pymc3_matches_scipy(
+            Dirichlet,
+            MultiSimplex(2, 2),
+            {"a": Vector(Vector(Rplus, 2), 2)},
+            dirichlet_logpdf,
+        )
 
-    @pytest.mark.parametrize('n', [2, 3])
+    @pytest.mark.parametrize("n", [2, 3])
     def test_multinomial(self, n):
-        self.pymc3_matches_scipy(Multinomial, Vector(Nat, n), {'p': Simplex(n), 'n': Nat},
-                                 multinomial_logpdf)
-
-    @pytest.mark.parametrize('p,n', [
-        [[.25, .25, .25, .25], 1],
-        [[.3, .6, .05, .05], 2],
-        [[.3, .6, .05, .05], 10],
-    ])
+        self.pymc3_matches_scipy(
+            Multinomial, Vector(Nat, n), {"p": Simplex(n), "n": Nat}, multinomial_logpdf
+        )
+
+    @pytest.mark.parametrize(
+        "p,n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 1],
+            [[0.3, 0.6, 0.05, 0.05], 2],
+            [[0.3, 0.6, 0.05, 0.05], 10],
+        ],
+    )
     def test_multinomial_mode(self, p, n):
         _p = np.array(p)
         with Model() as model:
-            m = Multinomial('m', n, _p, _p.shape)
+            m = Multinomial("m", n, _p, _p.shape)
         assert_allclose(m.distribution.mode.eval().sum(), n)
         _p = np.array([p, p])
         with Model() as model:
-            m = Multinomial('m', n, _p, _p.shape)
+            m = Multinomial("m", n, _p, _p.shape)
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
-    @pytest.mark.parametrize('p, shape, n', [
-        [[.25, .25, .25, .25], 4, 2],
-        [[.25, .25, .25, .25], (1, 4), 3],
-        # 3: expect to fail
-        # [[.25, .25, .25, .25], (10, 4)],
-        [[.25, .25, .25, .25], (10, 1, 4), 5],
-        # 5: expect to fail
-        # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
-        [[[.25, .25, .25, .25],
-         [.25, .25, .25, .25]], (2, 4), 13],
-        [[[.25, .25, .25, .25],
-         [.25, .25, .25, .25]], (1, 2, 4), [23, 29]],
-        [[[.25, .25, .25, .25],
-         [.25, .25, .25, .25]], (10, 2, 4), [31, 37]],
-        [[[.25, .25, .25, .25],
-         [.25, .25, .25, .25]], (2, 4), [17, 19]],
-    ])
+    @pytest.mark.parametrize(
+        "p, shape, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 4, 2],
+            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
+            # 3: expect to fail
+            # [[.25, .25, .25, .25], (10, 4)],
+            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
+            # 5: expect to fail
+            # [[[.25, .25, .25, .25]], (2, 4), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
+            [
+                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
+                (10, 2, 4),
+                [31, 37],
+            ],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
+        ],
+    )
     def test_multinomial_random(self, p, shape, n):
         p = np.asarray(p)
         with Model() as model:
-            m = Multinomial('m', n=n, p=p, shape=shape)
+            m = Multinomial("m", n=n, p=p, shape=shape)
         m.random()
 
     def test_multinomial_mode_with_shape(self):
         n = [1, 10]
-        p = np.asarray([[.25,.25,.25,.25], [.26, .26, .26, .22]])
+        p = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
         with Model() as model:
-            m = Multinomial('m', n=n, p=p, shape=(2, 4))
+            m = Multinomial("m", n=n, p=p, shape=(2, 4))
         assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
 
     def test_multinomial_vec(self):
-        vals = np.array([[2,4,4], [3,3,4]])
+        vals = np.array([[2, 4, 4], [3, 3, 4]])
         p = np.array([0.2, 0.3, 0.5])
         n = 10
 
         with Model() as model_single:
-            Multinomial('m', n=n, p=p, shape=len(p))
+            Multinomial("m", n=n, p=p, shape=len(p))
 
         with Model() as model_many:
-            Multinomial('m', n=n, p=p, shape=vals.shape)
+            Multinomial("m", n=n, p=p, shape=vals.shape)
 
-        assert_almost_equal(scipy.stats.multinomial.logpmf(vals, n, p),
-                            np.asarray([model_single.fastlogp({'m': val}) for val in vals]),
-                            decimal=4)
+        assert_almost_equal(
+            scipy.stats.multinomial.logpmf(vals, n, p),
+            np.asarray([model_single.fastlogp({"m": val}) for val in vals]),
+            decimal=4,
+        )
 
-        assert_almost_equal(scipy.stats.multinomial.logpmf(vals, n, p),
-                            model_many.free_RVs[0].logp_elemwise({'m': vals}).squeeze(),
-                            decimal=4)
+        assert_almost_equal(
+            scipy.stats.multinomial.logpmf(vals, n, p),
+            model_many.free_RVs[0].logp_elemwise({"m": vals}).squeeze(),
+            decimal=4,
+        )
 
-        assert_almost_equal(sum([model_single.fastlogp({'m': val}) for val in vals]),
-                            model_many.fastlogp({'m': vals}),
-                            decimal=4)
+        assert_almost_equal(
+            sum([model_single.fastlogp({"m": val}) for val in vals]),
+            model_many.fastlogp({"m": vals}),
+            decimal=4,
+        )
 
     def test_multinomial_vec_1d_n(self):
-        vals = np.array([[2,4,4], [4,3,4]])
+        vals = np.array([[2, 4, 4], [4, 3, 4]])
         p = np.array([0.2, 0.3, 0.5])
         ns = np.array([10, 11])
 
         with Model() as model:
-            Multinomial('m', n=ns, p=p, shape=vals.shape)
+            Multinomial("m", n=ns, p=p, shape=vals.shape)
 
-        assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, n in zip(vals, ns)]),
-                            model.fastlogp({'m': vals}),
-                            decimal=4)
+        assert_almost_equal(
+            sum([multinomial_logpdf(val, n, p) for val, n in zip(vals, ns)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
 
     def test_multinomial_vec_1d_n_2d_p(self):
-        vals = np.array([[2,4,4], [4,3,4]])
-        ps = np.array([[0.2, 0.3, 0.5],
-                       [0.9, 0.09, 0.01]])
+        vals = np.array([[2, 4, 4], [4, 3, 4]])
+        ps = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]])
         ns = np.array([10, 11])
 
         with Model() as model:
-            Multinomial('m', n=ns, p=ps, shape=vals.shape)
+            Multinomial("m", n=ns, p=ps, shape=vals.shape)
 
-        assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, n, p in zip(vals, ns, ps)]),
-                            model.fastlogp({'m': vals}),
-                            decimal=4)
+        assert_almost_equal(
+            sum([multinomial_logpdf(val, n, p) for val, n, p in zip(vals, ns, ps)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
 
     def test_multinomial_vec_2d_p(self):
-        vals = np.array([[2,4,4], [3,3,4]])
-        ps = np.array([[0.2, 0.3, 0.5],
-                       [0.3, 0.3, 0.4]])
+        vals = np.array([[2, 4, 4], [3, 3, 4]])
+        ps = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]])
         n = 10
 
         with Model() as model:
-            Multinomial('m', n=n, p=ps, shape=vals.shape)
+            Multinomial("m", n=n, p=ps, shape=vals.shape)
 
-        assert_almost_equal(sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]),
-                            model.fastlogp({'m': vals}),
-                            decimal=4)
+        assert_almost_equal(
+            sum([multinomial_logpdf(val, n, p) for val, p in zip(vals, ps)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
 
     def test_categorical_bounds(self):
         with Model():
-            x = Categorical('x', p=np.array([0.2, 0.3, 0.5]))
-            assert np.isinf(x.logp({'x': -1}))
-            assert np.isinf(x.logp({'x': 3}))
+            x = Categorical("x", p=np.array([0.2, 0.3, 0.5]))
+            assert np.isinf(x.logp({"x": -1}))
+            assert np.isinf(x.logp({"x": 3}))
 
-    @pytest.mark.parametrize('n', [2, 3, 4])
+    @pytest.mark.parametrize("n", [2, 3, 4])
     def test_categorical(self, n):
-        self.pymc3_matches_scipy(Categorical, Domain(range(n), 'int64'), {'p': Simplex(n)},
-                                 lambda value, p: categorical_logpdf(value, p))
+        self.pymc3_matches_scipy(
+            Categorical,
+            Domain(range(n), "int64"),
+            {"p": Simplex(n)},
+            lambda value, p: categorical_logpdf(value, p),
+        )
 
-    @pytest.mark.parametrize('n', [2, 3, 4])
+    @pytest.mark.parametrize("n", [2, 3, 4])
     def test_orderedlogistic(self, n):
-        self.pymc3_matches_scipy(OrderedLogistic, Domain(range(n), 'int64'),
-                                 {'eta': R, 'cutpoints': Vector(R, n-1)},
-                                 lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints))
+        self.pymc3_matches_scipy(
+            OrderedLogistic,
+            Domain(range(n), "int64"),
+            {"eta": R, "cutpoints": Vector(R, n - 1)},
+            lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints),
+        )
 
     def test_densitydist(self):
         def logp(x):
-            return -log(2 * .5) - abs(x - .5) / .5
-        self.checkd(DensityDist, R, {}, extra_args={'logp': logp})
+            return -log(2 * 0.5) - abs(x - 0.5) / 0.5
+
+        self.checkd(DensityDist, R, {}, extra_args={"logp": logp})
 
     def test_addpotential(self):
         with Model() as model:
-            value = Normal('value', 1, 1)
-            Potential('value_squared', -value ** 2)
+            value = Normal("value", 1, 1)
+            Potential("value_squared", -value ** 2)
             self.check_dlogp(model, value, R, {})
 
     def test_get_tau_sd(self):
         sd = np.array([2])
-        assert_almost_equal(continuous.get_tau_sd(sd=sd), [1. / sd**2, sd])
-
-    @pytest.mark.parametrize('value,mu,sigma,nu,logp', [
-        (0.5, -50.000, 0.500, 0.500, -99.8068528),
-        (1.0, -1.000, 0.001, 0.001, -1992.5922447),
-        (2.0, 0.001, 1.000, 1.000, -1.6720416),
-        (5.0, 0.500, 2.500, 2.500, -2.4543644),
-        (7.5, 2.000, 5.000, 5.000, -2.8259429),
-        (15.0, 5.000, 7.500, 7.500, -3.3093854),
-        (50.0, 50.000, 10.000, 10.000, -3.6436067),
-        (1000.0, 500.000, 10.000, 20.000, -27.8707323)
-    ])
+        assert_almost_equal(continuous.get_tau_sd(sd=sd), [1.0 / sd ** 2, sd])
+
+    @pytest.mark.parametrize(
+        "value,mu,sigma,nu,logp",
+        [
+            (0.5, -50.000, 0.500, 0.500, -99.8068528),
+            (1.0, -1.000, 0.001, 0.001, -1992.5922447),
+            (2.0, 0.001, 1.000, 1.000, -1.6720416),
+            (5.0, 0.500, 2.500, 2.500, -2.4543644),
+            (7.5, 2.000, 5.000, 5.000, -2.8259429),
+            (15.0, 5.000, 7.500, 7.500, -3.3093854),
+            (50.0, 50.000, 10.000, 10.000, -3.6436067),
+            (1000.0, 500.000, 10.000, 20.000, -27.8707323),
+        ],
+    )
     def test_ex_gaussian(self, value, mu, sigma, nu, logp):
         """Log probabilities calculated using the dexGAUS function from the R package gamlss.
         See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or http://www.gamlss.org/."""
         with Model() as model:
-            ExGaussian('eg', mu=mu, sigma=sigma, nu=nu)
-        pt = {'eg': value}
-        assert_almost_equal(model.fastlogp(pt), logp, decimal=select_by_precision(float64=6, float32=2), err_msg=str(pt))
-
-    @pytest.mark.parametrize('value,mu,sigma,nu,logcdf', [
-        (0.5, -50.000, 0.500, 0.500, 0.0000000),
-        (1.0, -1.000, 0.001, 0.001, 0.0000000),
-        (2.0, 0.001, 1.000, 1.000, -0.2365674),
-        (5.0, 0.500, 2.500, 2.500, -0.2886489),
-        (7.5, 2.000, 5.000, 5.000, -0.5655104),
-        (15.0, 5.000, 7.500, 7.500, -0.4545255),
-        (50.0, 50.000, 10.000, 10.000, -1.433714),
-        (1000.0, 500.000, 10.000, 20.000, -1.573708e-11),
-    ])
+            ExGaussian("eg", mu=mu, sigma=sigma, nu=nu)
+        pt = {"eg": value}
+        assert_almost_equal(
+            model.fastlogp(pt),
+            logp,
+            decimal=select_by_precision(float64=6, float32=2),
+            err_msg=str(pt),
+        )
+
+    @pytest.mark.parametrize(
+        "value,mu,sigma,nu,logcdf",
+        [
+            (0.5, -50.000, 0.500, 0.500, 0.0000000),
+            (1.0, -1.000, 0.001, 0.001, 0.0000000),
+            (2.0, 0.001, 1.000, 1.000, -0.2365674),
+            (5.0, 0.500, 2.500, 2.500, -0.2886489),
+            (7.5, 2.000, 5.000, 5.000, -0.5655104),
+            (15.0, 5.000, 7.500, 7.500, -0.4545255),
+            (50.0, 50.000, 10.000, 10.000, -1.433714),
+            (1000.0, 500.000, 10.000, 20.000, -1.573708e-11),
+        ],
+    )
     def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf):
         """Log probabilities calculated using the pexGAUS function from the R package gamlss.
         See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or http://www.gamlss.org/."""
@@ -1141,68 +1551,94 @@ def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf):
             ExGaussian.dist(mu=mu, sigma=sigma, nu=nu).logcdf(value).tag.test_value,
             logcdf,
             decimal=select_by_precision(float64=6, float32=2),
-            err_msg=str((value, mu, sigma, nu, logcdf)))
+            err_msg=str((value, mu, sigma, nu, logcdf)),
+        )
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_vonmises(self):
         self.pymc3_matches_scipy(
-            VonMises, R, {'mu': Circ, 'kappa': Rplus},
-            lambda value, mu, kappa: floatX(sp.vonmises.logpdf(value, kappa, loc=mu)))
+            VonMises,
+            R,
+            {"mu": Circ, "kappa": Rplus},
+            lambda value, mu, kappa: floatX(sp.vonmises.logpdf(value, kappa, loc=mu)),
+        )
 
     def test_gumbel(self):
         def gumbel(value, mu, beta):
             return floatX(sp.gumbel_r.logpdf(value, loc=mu, scale=beta))
-        self.pymc3_matches_scipy(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbel)
+
+        self.pymc3_matches_scipy(Gumbel, R, {"mu": R, "beta": Rplusbig}, gumbel)
 
         def gumbellcdf(value, mu, beta):
             return floatX(sp.gumbel_r.logcdf(value, loc=mu, scale=beta))
-        self.check_logcdf(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbellcdf)
+
+        self.check_logcdf(Gumbel, R, {"mu": R, "beta": Rplusbig}, gumbellcdf)
 
     def test_logistic(self):
-        self.pymc3_matches_scipy(Logistic, R, {'mu': R, 's': Rplus},
-                                 lambda value, mu, s: sp.logistic.logpdf(value, mu, s),
-                                 decimal=select_by_precision(float64=6, float32=1))
-        self.check_logcdf(Logistic, R, {'mu': R, 's': Rplus},
-                          lambda value, mu, s: sp.logistic.logcdf(value, mu, s),
-                          decimal=select_by_precision(float64=6, float32=1))
+        self.pymc3_matches_scipy(
+            Logistic,
+            R,
+            {"mu": R, "s": Rplus},
+            lambda value, mu, s: sp.logistic.logpdf(value, mu, s),
+            decimal=select_by_precision(float64=6, float32=1),
+        )
+        self.check_logcdf(
+            Logistic,
+            R,
+            {"mu": R, "s": Rplus},
+            lambda value, mu, s: sp.logistic.logcdf(value, mu, s),
+            decimal=select_by_precision(float64=6, float32=1),
+        )
 
     def test_logitnormal(self):
-        self.pymc3_matches_scipy(LogitNormal, Unit, {'mu': R, 'sd': Rplus},
-                                 lambda value, mu, sd: (sp.norm.logpdf(logit(value), mu, sd)
-                                                        - (np.log(value) + np.log1p(-value))),
-                                 decimal=select_by_precision(float64=6, float32=1))
+        self.pymc3_matches_scipy(
+            LogitNormal,
+            Unit,
+            {"mu": R, "sd": Rplus},
+            lambda value, mu, sd: (
+                sp.norm.logpdf(logit(value), mu, sd)
+                - (np.log(value) + np.log1p(-value))
+            ),
+            decimal=select_by_precision(float64=6, float32=1),
+        )
 
     def test_multidimensional_beta_construction(self):
         with Model():
-            Beta('beta', alpha=1., beta=1., shape=(10, 20))
+            Beta("beta", alpha=1.0, beta=1.0, shape=(10, 20))
 
     def test_rice(self):
-        self.pymc3_matches_scipy(Rice, Rplus, {'nu': Rplus, 'sd': Rplusbig},
-                                 lambda value, nu, sd: sp.rice.logpdf(value, b=nu, loc=0, scale=sd))
+        self.pymc3_matches_scipy(
+            Rice,
+            Rplus,
+            {"nu": Rplus, "sd": Rplusbig},
+            lambda value, nu, sd: sp.rice.logpdf(value, b=nu, loc=0, scale=sd),
+        )
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_interpolated(self):
         for mu in R.vals:
             for sd in Rplus.vals:
-                #pylint: disable=cell-var-from-loop
+                # pylint: disable=cell-var-from-loop
                 xmin = mu - 5 * sd
                 xmax = mu + 5 * sd
 
-                class TestedInterpolated (Interpolated):
+                class TestedInterpolated(Interpolated):
                     def __init__(self, **kwargs):
                         x_points = np.linspace(xmin, xmax, 100000)
                         pdf_points = sp.norm.pdf(x_points, loc=mu, scale=sd)
                         super(TestedInterpolated, self).__init__(
-                            x_points=x_points,
-                            pdf_points=pdf_points,
-                            **kwargs
+                            x_points=x_points, pdf_points=pdf_points, **kwargs
                         )
 
                 def ref_pdf(value):
                     return np.where(
                         np.logical_and(value >= xmin, value <= xmax),
                         sp.norm.logpdf(value, mu, sd),
-                        -np.inf * np.ones(value.shape)
+                        -np.inf * np.ones(value.shape),
                     )
 
                 self.pymc3_matches_scipy(TestedInterpolated, R, {}, ref_pdf)
@@ -1213,7 +1649,7 @@ def test_bound():
     UnboundNormal = Bound(Normal)
     dist = UnboundNormal.dist(mu=0, sd=1)
     assert dist.transform is None
-    assert dist.default() == 0.
+    assert dist.default() == 0.0
     assert isinstance(dist.random(), np.ndarray)
 
     LowerNormal = Bound(Normal, lower=1)
@@ -1237,13 +1673,13 @@ def test_bound():
     assert dist.transform is not None
     with pytest.raises(ValueError) as err:
         dist.random()
-    err.match('Drawing samples from distributions with array-valued')
+    err.match("Drawing samples from distributions with array-valued")
 
     with Model():
-        a = ArrayNormal('c', shape=2)
+        a = ArrayNormal("c", shape=2)
         assert_equal(a.tag.test_value, np.array([1.5, 2.5]))
 
-    lower = tt.vector('lower')
+    lower = tt.vector("lower")
     lower.tag.test_value = np.array([1, 2]).astype(theano.config.floatX)
     upper = 3
     ArrayNormal = Bound(Normal, lower=lower, upper=upper)
@@ -1254,7 +1690,7 @@ def test_bound():
     assert dist.transform is not None
 
     with Model():
-        a = ArrayNormal('c', shape=2)
+        a = ArrayNormal("c", shape=2)
         assert_equal(a.tag.test_value, np.array([2, 2.5]))
 
     rand = Bound(Binomial, lower=10).dist(n=20, p=0.3).random()
@@ -1275,7 +1711,6 @@ def test_bound():
 
 
 class TestLatex(object):
-
     def setup_class(self):
         # True parameter values
         alpha, sigma = 1, 1
@@ -1288,25 +1723,25 @@ def setup_class(self):
         X = np.random.normal(size=(size, 2)).dot(np.array([[1, 0], [0, 0.2]]))
 
         # Simulate outcome variable
-        Y = alpha + X.dot(beta) + np.random.randn(size)*sigma
+        Y = alpha + X.dot(beta) + np.random.randn(size) * sigma
         with Model() as self.model:
             # Priors for unknown model parameters
-            alpha = Normal('alpha', mu=0, sd=10)
-            b = Normal('beta', mu=0, sd=10, shape=(2,), observed=beta)
-            sigma = HalfNormal('sigma', sd=1)
+            alpha = Normal("alpha", mu=0, sd=10)
+            b = Normal("beta", mu=0, sd=10, shape=(2,), observed=beta)
+            sigma = HalfNormal("sigma", sd=1)
 
             # Expected value of outcome
-            mu = Deterministic('mu', alpha + tt.dot(X, b))
+            mu = Deterministic("mu", alpha + tt.dot(X, b))
 
             # Likelihood (sampling distribution) of observations
-            Y_obs = Normal('Y_obs', mu=mu, sd=sigma, observed=Y)
+            Y_obs = Normal("Y_obs", mu=mu, sd=sigma, observed=Y)
         self.distributions = [alpha, sigma, mu, b, Y_obs]
         self.expected = (
-            r'$\text{alpha} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$',
-            r'$\text{sigma} \sim \text{HalfNormal}(\mathit{sd}=1.0)$',
-            r'$\text{mu} \sim \text{Deterministic}(\text{alpha},~\text{Constant},~\text{beta})$',
-            r'$\text{beta} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$',
-            r'$\text{Y_obs} \sim \text{Normal}(\mathit{mu}=\text{mu},~\mathit{sd}=f(\text{sigma}))$'
+            r"$\text{alpha} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$",
+            r"$\text{sigma} \sim \text{HalfNormal}(\mathit{sd}=1.0)$",
+            r"$\text{mu} \sim \text{Deterministic}(\text{alpha},~\text{Constant},~\text{beta})$",
+            r"$\text{beta} \sim \text{Normal}(\mathit{mu}=0,~\mathit{sd}=10.0)$",
+            r"$\text{Y_obs} \sim \text{Normal}(\mathit{mu}=\text{mu},~\mathit{sd}=f(\text{sigma}))$",
         )
 
     def test__repr_latex_(self):
@@ -1316,7 +1751,7 @@ def test__repr_latex_(self):
         model_tex = self.model._repr_latex_()
 
         for tex in self.expected:  # make sure each variable is in the model
-            for segment in tex.strip('$').split(r'\sim'):
+            for segment in tex.strip("$").split(r"\sim"):
                 assert segment in model_tex
 
     def test___latex__(self):
@@ -1327,9 +1762,9 @@ def test___latex__(self):
 
 def test_discrete_trafo():
     with pytest.raises(ValueError) as err:
-        Binomial.dist(n=5, p=0.5, transform='log')
-    err.match('Transformations for discrete distributions')
+        Binomial.dist(n=5, p=0.5, transform="log")
+    err.match("Transformations for discrete distributions")
     with Model():
         with pytest.raises(ValueError) as err:
-            Binomial('a', n=5, p=0.5, transform='log')
-        err.match('Transformations for discrete distributions')
+            Binomial("a", n=5, p=0.5, transform="log")
+        err.match("Transformations for discrete distributions")
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 9bc2841624..6a15f81aed 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -13,15 +13,38 @@
 from pymc3.distributions.distribution import draw_values
 from .helpers import SeededTest
 from .test_distributions import (
-    build_model, Domain, product, R, Rplus, Rplusbig, Rplusdunif,
-    Unit, Nat, NatSmall, I, Simplex, Vector, PdMatrix,
-    PdMatrixChol, PdMatrixCholUpper, RealMatrix, RandomPdMatrix
+    build_model,
+    Domain,
+    product,
+    R,
+    Rplus,
+    Rplusbig,
+    Rplusdunif,
+    Unit,
+    Nat,
+    NatSmall,
+    I,
+    Simplex,
+    Vector,
+    PdMatrix,
+    PdMatrixChol,
+    PdMatrixCholUpper,
+    RealMatrix,
+    RandomPdMatrix,
 )
 
 
-def pymc3_random(dist, paramdomains, ref_rand, valuedomain=Domain([0]),
-                 size=10000, alpha=0.05, fails=10, extra_args=None,
-                 model_args=None):
+def pymc3_random(
+    dist,
+    paramdomains,
+    ref_rand,
+    valuedomain=Domain([0]),
+    size=10000,
+    alpha=0.05,
+    fails=10,
+    extra_args=None,
+    model_args=None,
+):
     if model_args is None:
         model_args = {}
     model = build_model(dist, valuedomain, paramdomains, extra_args)
@@ -34,17 +57,22 @@ def pymc3_random(dist, paramdomains, ref_rand, valuedomain=Domain([0]),
         # a certain number of times. Crude, but necessary.
         f = fails
         while p <= alpha and f > 0:
-            s0 = model.named_vars['value'].random(size=size, point=pt)
+            s0 = model.named_vars["value"].random(size=size, point=pt)
             s1 = ref_rand(size=size, **pt)
-            _, p = st.ks_2samp(np.atleast_1d(s0).flatten(),
-                               np.atleast_1d(s1).flatten())
+            _, p = st.ks_2samp(np.atleast_1d(s0).flatten(), np.atleast_1d(s1).flatten())
             f -= 1
         assert p > alpha, str(pt)
 
 
-def pymc3_random_discrete(dist, paramdomains,
-                          valuedomain=Domain([0]), ref_rand=None,
-                          size=100000, alpha=0.05, fails=20):
+def pymc3_random_discrete(
+    dist,
+    paramdomains,
+    valuedomain=Domain([0]),
+    ref_rand=None,
+    size=100000,
+    alpha=0.05,
+    fails=20,
+):
     model = build_model(dist, valuedomain, paramdomains)
     domains = paramdomains.copy()
     for pt in product(domains, n_samples=100):
@@ -54,7 +82,7 @@ def pymc3_random_discrete(dist, paramdomains,
         # a certain number of times.
         f = fails
         while p <= alpha and f > 0:
-            o = model.named_vars['value'].random(size=size, point=pt)
+            o = model.named_vars["value"].random(size=size, point=pt)
             e = ref_rand(size=size, **pt)
             o = np.atleast_1d(o).flatten()
             e = np.atleast_1d(e).flatten()
@@ -64,7 +92,7 @@ def pymc3_random_discrete(dist, paramdomains,
                 expected[e] = (observed.get(e, 0), expected[e])
             k = np.array([v for v in expected.values()])
             if np.all(k[:, 0] == k[:, 1]):
-                p = 1.
+                p = 1.0
             else:
                 _, p = st.chisquare(k[:, 0], k[:, 1])
             f -= 1
@@ -74,23 +102,23 @@ def pymc3_random_discrete(dist, paramdomains,
 class TestDrawValues(SeededTest):
     def test_draw_scalar_parameters(self):
         with pm.Model():
-            y = pm.Normal('y1', mu=0., sd=1.)
+            y = pm.Normal("y1", mu=0.0, sd=1.0)
             mu, tau = draw_values([y.distribution.mu, y.distribution.tau])
         npt.assert_almost_equal(mu, 0)
         npt.assert_almost_equal(tau, 1)
 
     def test_draw_dependencies(self):
         with pm.Model():
-            x = pm.Normal('x', mu=0., sd=1.)
-            exp_x = pm.Deterministic('exp_x', pm.math.exp(x))
+            x = pm.Normal("x", mu=0.0, sd=1.0)
+            exp_x = pm.Deterministic("exp_x", pm.math.exp(x))
 
         x, exp_x = draw_values([x, exp_x])
         npt.assert_almost_equal(np.exp(x), exp_x)
 
     def test_draw_order(self):
         with pm.Model():
-            x = pm.Normal('x', mu=0., sd=1.)
-            exp_x = pm.Deterministic('exp_x', pm.math.exp(x))
+            x = pm.Normal("x", mu=0.0, sd=1.0)
+            exp_x = pm.Deterministic("exp_x", pm.math.exp(x))
 
         # Need to draw x before drawing log_x
         exp_x, x = draw_values([exp_x, x])
@@ -98,19 +126,20 @@ def test_draw_order(self):
 
     def test_draw_point_replacement(self):
         with pm.Model():
-            mu = pm.Normal('mu', mu=0., tau=1e-3)
-            sigma = pm.Gamma('sigma', alpha=1., beta=1., transform=None)
-            y = pm.Normal('y', mu=mu, sd=sigma)
-            mu2, tau2 = draw_values([y.distribution.mu, y.distribution.tau],
-                                                     point={'mu': 5., 'sigma': 2.})
+            mu = pm.Normal("mu", mu=0.0, tau=1e-3)
+            sigma = pm.Gamma("sigma", alpha=1.0, beta=1.0, transform=None)
+            y = pm.Normal("y", mu=mu, sd=sigma)
+            mu2, tau2 = draw_values(
+                [y.distribution.mu, y.distribution.tau], point={"mu": 5.0, "sigma": 2.0}
+            )
         npt.assert_almost_equal(mu2, 5)
-        npt.assert_almost_equal(tau2, 1 / 2.**2)
+        npt.assert_almost_equal(tau2, 1 / 2.0 ** 2)
 
     def test_random_sample_returns_nd_array(self):
         with pm.Model():
-            mu = pm.Normal('mu', mu=0., tau=1e-3)
-            sigma = pm.Gamma('sigma', alpha=1., beta=1., transform=None)
-            y = pm.Normal('y', mu=mu, sd=sigma)
+            mu = pm.Normal("mu", mu=0.0, tau=1e-3)
+            sigma = pm.Gamma("sigma", alpha=1.0, beta=1.0, transform=None)
+            y = pm.Normal("y", mu=mu, sd=sigma)
             mu, tau = draw_values([y.distribution.mu, y.distribution.tau])
         assert isinstance(mu, np.ndarray)
         assert isinstance(tau, np.ndarray)
@@ -126,8 +155,10 @@ def setup_method(self, *args, **kwargs):
 
         def get_random_variable(self, shape, with_vector_params=False, name=None):
             if with_vector_params:
-                params = {key: value * np.ones(self.shape, dtype=np.dtype(type(value))) for
-                          key, value in self.params.items()}
+                params = {
+                    key: value * np.ones(self.shape, dtype=np.dtype(type(value)))
+                    for key, value in self.params.items()
+                }
             else:
                 params = self.params
             if name is None:
@@ -136,7 +167,9 @@ def get_random_variable(self, shape, with_vector_params=False, name=None):
                 if shape is None:
                     return self.distribution(name, transform=None, **params)
                 else:
-                    return self.distribution(name, shape=shape, transform=None, **params)
+                    return self.distribution(
+                        name, shape=shape, transform=None, **params
+                    )
 
         @staticmethod
         def sample_random_variable(random_variable, size):
@@ -145,17 +178,17 @@ def sample_random_variable(random_variable, size):
             except AttributeError:
                 return random_variable.distribution.random(size=size)
 
-        @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str)
+        @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str)
         def test_scalar_parameter_shape(self, size):
             rv = self.get_random_variable(None)
             if size is None:
-                expected = 1,
+                expected = (1,)
             else:
                 expected = np.atleast_1d(size).tolist()
             actual = np.atleast_1d(self.sample_random_variable(rv, size)).shape
             assert tuple(expected) == actual
 
-        @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str)
+        @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str)
         def test_scalar_shape(self, size):
             shape = 10
             rv = self.get_random_variable(shape)
@@ -168,7 +201,7 @@ def test_scalar_shape(self, size):
             actual = np.atleast_1d(self.sample_random_variable(rv, size)).shape
             assert tuple(expected) == actual
 
-        @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str)
+        @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str)
         def test_parameters_1d_shape(self, size):
             rv = self.get_random_variable(self.shape, with_vector_params=True)
             if size is None:
@@ -179,7 +212,7 @@ def test_parameters_1d_shape(self, size):
             actual = self.sample_random_variable(rv, size).shape
             assert tuple(expected) == actual
 
-        @pytest.mark.parametrize('size', [None, 5, (4, 5)], ids=str)
+        @pytest.mark.parametrize("size", [None, 5, (4, 5)], ids=str)
         def test_broadcast_shape(self, size):
             broadcast_shape = (2 * self.shape, self.shape)
             rv = self.get_random_variable(broadcast_shape, with_vector_params=True)
@@ -191,11 +224,13 @@ def test_broadcast_shape(self, size):
             actual = np.atleast_1d(self.sample_random_variable(rv, size)).shape
             assert tuple(expected) == actual
 
-        @pytest.mark.parametrize('shape', [(), (1,), (1, 1), (1, 2), (10, 10, 1), (10, 10, 2)], ids=str)
+        @pytest.mark.parametrize(
+            "shape", [(), (1,), (1, 1), (1, 2), (10, 10, 1), (10, 10, 2)], ids=str
+        )
         def test_different_shapes_and_sample_sizes(self, shape):
             prefix = self.distribution.__name__
 
-            rv = self.get_random_variable(shape, name='%s_%s' % (prefix, shape))
+            rv = self.get_random_variable(shape, name="%s_%s" % (prefix, shape))
             for size in (None, 1, 5, (4, 5)):
                 if size is None:
                     s = []
@@ -215,191 +250,199 @@ def test_different_shapes_and_sample_sizes(self, shape):
 
 class TestNormal(BaseTestCases.BaseTestCase):
     distribution = pm.Normal
-    params = {'mu': 0., 'tau': 1.}
+    params = {"mu": 0.0, "tau": 1.0}
+
 
 class TestTruncatedNormal(BaseTestCases.BaseTestCase):
     distribution = pm.TruncatedNormal
-    params = {'mu': 0., 'tau': 1., 'lower':-0.5, 'upper':0.5}
+    params = {"mu": 0.0, "tau": 1.0, "lower": -0.5, "upper": 0.5}
+
 
 class TestSkewNormal(BaseTestCases.BaseTestCase):
     distribution = pm.SkewNormal
-    params = {'mu': 0., 'sd': 1., 'alpha': 5.}
+    params = {"mu": 0.0, "sd": 1.0, "alpha": 5.0}
 
 
 class TestHalfNormal(BaseTestCases.BaseTestCase):
     distribution = pm.HalfNormal
-    params = {'tau': 1.}
+    params = {"tau": 1.0}
 
 
 class TestUniform(BaseTestCases.BaseTestCase):
     distribution = pm.Uniform
-    params = {'lower': 0., 'upper': 1.}
+    params = {"lower": 0.0, "upper": 1.0}
 
 
 class TestTriangular(BaseTestCases.BaseTestCase):
     distribution = pm.Triangular
-    params = {'c': 0.5, 'lower': 0., 'upper': 1.}
+    params = {"c": 0.5, "lower": 0.0, "upper": 1.0}
 
 
 class TestWald(BaseTestCases.BaseTestCase):
     distribution = pm.Wald
-    params = {'mu': 1., 'lam': 1., 'alpha': 0.}
+    params = {"mu": 1.0, "lam": 1.0, "alpha": 0.0}
 
 
 class TestBeta(BaseTestCases.BaseTestCase):
     distribution = pm.Beta
-    params = {'alpha': 1., 'beta': 1.}
+    params = {"alpha": 1.0, "beta": 1.0}
 
 
 class TestKumaraswamy(BaseTestCases.BaseTestCase):
     distribution = pm.Kumaraswamy
-    params = {'a': 1., 'b': 1.}
+    params = {"a": 1.0, "b": 1.0}
 
 
 class TestExponential(BaseTestCases.BaseTestCase):
     distribution = pm.Exponential
-    params = {'lam': 1.}
+    params = {"lam": 1.0}
 
 
 class TestLaplace(BaseTestCases.BaseTestCase):
     distribution = pm.Laplace
-    params = {'mu': 1., 'b': 1.}
+    params = {"mu": 1.0, "b": 1.0}
 
 
 class TestLognormal(BaseTestCases.BaseTestCase):
     distribution = pm.Lognormal
-    params = {'mu': 1., 'tau': 1.}
+    params = {"mu": 1.0, "tau": 1.0}
 
 
 class TestStudentT(BaseTestCases.BaseTestCase):
     distribution = pm.StudentT
-    params = {'nu': 5., 'mu': 0., 'lam': 1.}
+    params = {"nu": 5.0, "mu": 0.0, "lam": 1.0}
 
 
 class TestPareto(BaseTestCases.BaseTestCase):
     distribution = pm.Pareto
-    params = {'alpha': 0.5, 'm': 1.}
+    params = {"alpha": 0.5, "m": 1.0}
 
 
 class TestCauchy(BaseTestCases.BaseTestCase):
     distribution = pm.Cauchy
-    params = {'alpha': 1., 'beta': 1.}
+    params = {"alpha": 1.0, "beta": 1.0}
 
 
 class TestHalfCauchy(BaseTestCases.BaseTestCase):
     distribution = pm.HalfCauchy
-    params = {'beta': 1.}
+    params = {"beta": 1.0}
 
 
 class TestGamma(BaseTestCases.BaseTestCase):
     distribution = pm.Gamma
-    params = {'alpha': 1., 'beta': 1.}
+    params = {"alpha": 1.0, "beta": 1.0}
 
 
 class TestInverseGamma(BaseTestCases.BaseTestCase):
     distribution = pm.InverseGamma
-    params = {'alpha': 0.5, 'beta': 0.5}
+    params = {"alpha": 0.5, "beta": 0.5}
 
 
 class TestChiSquared(BaseTestCases.BaseTestCase):
     distribution = pm.ChiSquared
-    params = {'nu': 2.}
+    params = {"nu": 2.0}
 
 
 class TestWeibull(BaseTestCases.BaseTestCase):
     distribution = pm.Weibull
-    params = {'alpha': 1., 'beta': 1.}
+    params = {"alpha": 1.0, "beta": 1.0}
 
 
 class TestExGaussian(BaseTestCases.BaseTestCase):
     distribution = pm.ExGaussian
-    params = {'mu': 0., 'sigma': 1., 'nu': 1.}
+    params = {"mu": 0.0, "sigma": 1.0, "nu": 1.0}
 
 
 class TestVonMises(BaseTestCases.BaseTestCase):
     distribution = pm.VonMises
-    params = {'mu': 0., 'kappa': 1.}
+    params = {"mu": 0.0, "kappa": 1.0}
 
 
 class TestGumbel(BaseTestCases.BaseTestCase):
     distribution = pm.Gumbel
-    params = {'mu': 0., 'beta': 1.}
+    params = {"mu": 0.0, "beta": 1.0}
 
 
 class TestLogistic(BaseTestCases.BaseTestCase):
     distribution = pm.Logistic
-    params = {'mu': 0., 's': 1.}
+    params = {"mu": 0.0, "s": 1.0}
 
 
 class TestLogitNormal(BaseTestCases.BaseTestCase):
     distribution = pm.LogitNormal
-    params = {'mu': 0., 'sd': 1.}
+    params = {"mu": 0.0, "sd": 1.0}
 
 
 class TestBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.Binomial
-    params = {'n': 5, 'p': 0.5}
+    params = {"n": 5, "p": 0.5}
 
 
 class TestBetaBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.BetaBinomial
-    params = {'n': 5, 'alpha': 1., 'beta': 1.}
+    params = {"n": 5, "alpha": 1.0, "beta": 1.0}
 
 
 class TestBernoulli(BaseTestCases.BaseTestCase):
     distribution = pm.Bernoulli
-    params = {'p': 0.5}
+    params = {"p": 0.5}
 
 
 class TestDiscreteWeibull(BaseTestCases.BaseTestCase):
     distribution = pm.DiscreteWeibull
-    params = {'q': 0.25, 'beta': 2.}
+    params = {"q": 0.25, "beta": 2.0}
 
 
 class TestPoisson(BaseTestCases.BaseTestCase):
     distribution = pm.Poisson
-    params = {'mu': 1.}
+    params = {"mu": 1.0}
 
 
 class TestNegativeBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.NegativeBinomial
-    params = {'mu': 1., 'alpha': 1.}
+    params = {"mu": 1.0, "alpha": 1.0}
 
 
 class TestConstant(BaseTestCases.BaseTestCase):
     distribution = pm.Constant
-    params = {'c': 3}
+    params = {"c": 3}
 
 
 class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase):
     distribution = pm.ZeroInflatedPoisson
-    params = {'theta': 1., 'psi': 0.3}
+    params = {"theta": 1.0, "psi": 0.3}
 
 
 class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.ZeroInflatedNegativeBinomial
-    params = {'mu': 1., 'alpha': 1., 'psi': 0.3}
+    params = {"mu": 1.0, "alpha": 1.0, "psi": 0.3}
+
 
 class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.ZeroInflatedBinomial
-    params = {'n': 10, 'p': 0.6, 'psi': 0.3}
+    params = {"n": 10, "p": 0.6, "psi": 0.3}
+
 
 class TestDiscreteUniform(BaseTestCases.BaseTestCase):
     distribution = pm.DiscreteUniform
-    params = {'lower': 0., 'upper': 10.}
+    params = {"lower": 0.0, "upper": 10.0}
 
 
 class TestGeometric(BaseTestCases.BaseTestCase):
     distribution = pm.Geometric
-    params = {'p': 0.5}
+    params = {"p": 0.5}
 
 
 class TestCategorical(BaseTestCases.BaseTestCase):
     distribution = pm.Categorical
-    params = {'p': np.ones(BaseTestCases.BaseTestCase.shape)}
+    params = {"p": np.ones(BaseTestCases.BaseTestCase.shape)}
 
-    def get_random_variable(self, shape, with_vector_params=False, **kwargs):  # don't transform categories
-        return super(TestCategorical, self).get_random_variable(shape, with_vector_params=False, **kwargs)
+    def get_random_variable(
+        self, shape, with_vector_params=False, **kwargs
+    ):  # don't transform categories
+        return super(TestCategorical, self).get_random_variable(
+            shape, with_vector_params=False, **kwargs
+        )
 
     def test_probability_vector_shape(self):
         """Check that if a 2d array of probabilities are passed to categorical correct shape is returned"""
@@ -414,173 +457,235 @@ def test_bounded(self):
 
         def ref_rand(size, tau):
             return -st.halfnorm.rvs(size=size, loc=0, scale=tau ** -0.5)
-        pymc3_random(BoundedNormal, {'tau': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(BoundedNormal, {"tau": Rplus}, ref_rand=ref_rand)
 
     def test_uniform(self):
         def ref_rand(size, lower, upper):
             return st.uniform.rvs(size=size, loc=lower, scale=upper - lower)
 
-        pymc3_random(pm.Uniform, {'lower': -Rplus, 'upper': Rplus}, ref_rand=ref_rand)
+        pymc3_random(pm.Uniform, {"lower": -Rplus, "upper": Rplus}, ref_rand=ref_rand)
 
     def test_normal(self):
         def ref_rand(size, mu, sd):
             return st.norm.rvs(size=size, loc=mu, scale=sd)
-        pymc3_random(pm.Normal, {'mu': R, 'sd': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Normal, {"mu": R, "sd": Rplus}, ref_rand=ref_rand)
 
     def test_truncated_normal(self):
         def ref_rand(size, mu, sd, lower, upper):
-            return st.truncnorm.rvs((lower-mu)/sd, (upper-mu)/sd, size=size, loc=mu, scale=sd)
-        pymc3_random(pm.TruncatedNormal, {'mu': R, 'sd': Rplusbig, 'lower':-Rplusbig, 'upper':Rplusbig},
-                     ref_rand=ref_rand)
+            return st.truncnorm.rvs(
+                (lower - mu) / sd, (upper - mu) / sd, size=size, loc=mu, scale=sd
+            )
+
+        pymc3_random(
+            pm.TruncatedNormal,
+            {"mu": R, "sd": Rplusbig, "lower": -Rplusbig, "upper": Rplusbig},
+            ref_rand=ref_rand,
+        )
 
     def test_skew_normal(self):
         def ref_rand(size, alpha, mu, sd):
             return st.skewnorm.rvs(size=size, a=alpha, loc=mu, scale=sd)
-        pymc3_random(pm.SkewNormal, {'mu': R, 'sd': Rplus, 'alpha': R}, ref_rand=ref_rand)
+
+        pymc3_random(
+            pm.SkewNormal, {"mu": R, "sd": Rplus, "alpha": R}, ref_rand=ref_rand
+        )
 
     def test_half_normal(self):
         def ref_rand(size, tau):
             return st.halfnorm.rvs(size=size, loc=0, scale=tau ** -0.5)
-        pymc3_random(pm.HalfNormal, {'tau': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.HalfNormal, {"tau": Rplus}, ref_rand=ref_rand)
 
     def test_wald(self):
         # Cannot do anything too exciting as scipy wald is a
         # location-scale model of the *standard* wald with mu=1 and lam=1
         def ref_rand(size, mu, lam, alpha):
             return st.wald.rvs(size=size, loc=alpha)
-        pymc3_random(pm.Wald,
-                     {'mu': Domain([1., 1., 1.]), 'lam': Domain(
-                         [1., 1., 1.]), 'alpha': Rplus},
-                     ref_rand=ref_rand)
+
+        pymc3_random(
+            pm.Wald,
+            {
+                "mu": Domain([1.0, 1.0, 1.0]),
+                "lam": Domain([1.0, 1.0, 1.0]),
+                "alpha": Rplus,
+            },
+            ref_rand=ref_rand,
+        )
 
     def test_beta(self):
         def ref_rand(size, alpha, beta):
             return st.beta.rvs(a=alpha, b=beta, size=size)
-        pymc3_random(pm.Beta, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Beta, {"alpha": Rplus, "beta": Rplus}, ref_rand=ref_rand)
 
     def test_exponential(self):
         def ref_rand(size, lam):
-            return nr.exponential(scale=1. / lam, size=size)
-        pymc3_random(pm.Exponential, {'lam': Rplus}, ref_rand=ref_rand)
+            return nr.exponential(scale=1.0 / lam, size=size)
+
+        pymc3_random(pm.Exponential, {"lam": Rplus}, ref_rand=ref_rand)
 
     def test_laplace(self):
         def ref_rand(size, mu, b):
             return st.laplace.rvs(mu, b, size=size)
-        pymc3_random(pm.Laplace, {'mu': R, 'b': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Laplace, {"mu": R, "b": Rplus}, ref_rand=ref_rand)
 
     def test_lognormal(self):
         def ref_rand(size, mu, tau):
-            return np.exp(mu + (tau ** -0.5) * st.norm.rvs(loc=0., scale=1., size=size))
-        pymc3_random(pm.Lognormal, {'mu': R, 'tau': Rplusbig}, ref_rand=ref_rand)
+            return np.exp(
+                mu + (tau ** -0.5) * st.norm.rvs(loc=0.0, scale=1.0, size=size)
+            )
+
+        pymc3_random(pm.Lognormal, {"mu": R, "tau": Rplusbig}, ref_rand=ref_rand)
 
     def test_student_t(self):
         def ref_rand(size, nu, mu, lam):
-            return st.t.rvs(nu, mu, lam**-.5, size=size)
-        pymc3_random(pm.StudentT, {'nu': Rplus, 'mu': R, 'lam': Rplus}, ref_rand=ref_rand)
+            return st.t.rvs(nu, mu, lam ** -0.5, size=size)
+
+        pymc3_random(
+            pm.StudentT, {"nu": Rplus, "mu": R, "lam": Rplus}, ref_rand=ref_rand
+        )
 
     def test_cauchy(self):
         def ref_rand(size, alpha, beta):
             return st.cauchy.rvs(alpha, beta, size=size)
-        pymc3_random(pm.Cauchy, {'alpha': R, 'beta': Rplusbig}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Cauchy, {"alpha": R, "beta": Rplusbig}, ref_rand=ref_rand)
 
     def test_half_cauchy(self):
         def ref_rand(size, beta):
             return st.halfcauchy.rvs(scale=beta, size=size)
-        pymc3_random(pm.HalfCauchy, {'beta': Rplusbig}, ref_rand=ref_rand)
+
+        pymc3_random(pm.HalfCauchy, {"beta": Rplusbig}, ref_rand=ref_rand)
 
     def test_gamma_alpha_beta(self):
         def ref_rand(size, alpha, beta):
-            return st.gamma.rvs(alpha, scale=1. / beta, size=size)
-        pymc3_random(pm.Gamma, {'alpha': Rplusbig, 'beta': Rplusbig}, ref_rand=ref_rand)
+            return st.gamma.rvs(alpha, scale=1.0 / beta, size=size)
+
+        pymc3_random(pm.Gamma, {"alpha": Rplusbig, "beta": Rplusbig}, ref_rand=ref_rand)
 
     def test_gamma_mu_sd(self):
         def ref_rand(size, mu, sd):
-            return st.gamma.rvs(mu**2 / sd**2, scale=sd ** 2 / mu, size=size)
-        pymc3_random(pm.Gamma, {'mu': Rplusbig, 'sd': Rplusbig}, ref_rand=ref_rand)
+            return st.gamma.rvs(mu ** 2 / sd ** 2, scale=sd ** 2 / mu, size=size)
+
+        pymc3_random(pm.Gamma, {"mu": Rplusbig, "sd": Rplusbig}, ref_rand=ref_rand)
 
     def test_inverse_gamma(self):
         def ref_rand(size, alpha, beta):
             return st.invgamma.rvs(a=alpha, scale=beta, size=size)
-        pymc3_random(pm.InverseGamma, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(
+            pm.InverseGamma, {"alpha": Rplus, "beta": Rplus}, ref_rand=ref_rand
+        )
 
     def test_pareto(self):
         def ref_rand(size, alpha, m):
             return st.pareto.rvs(alpha, scale=m, size=size)
-        pymc3_random(pm.Pareto, {'alpha': Rplusbig, 'm': Rplusbig}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Pareto, {"alpha": Rplusbig, "m": Rplusbig}, ref_rand=ref_rand)
 
     def test_ex_gaussian(self):
         def ref_rand(size, mu, sigma, nu):
             return nr.normal(mu, sigma, size=size) + nr.exponential(scale=nu, size=size)
-        pymc3_random(pm.ExGaussian, {'mu': R, 'sigma': Rplus, 'nu': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(
+            pm.ExGaussian, {"mu": R, "sigma": Rplus, "nu": Rplus}, ref_rand=ref_rand
+        )
 
     def test_vonmises(self):
         def ref_rand(size, mu, kappa):
             return st.vonmises.rvs(size=size, loc=mu, kappa=kappa)
-        pymc3_random(pm.VonMises, {'mu': R, 'kappa': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.VonMises, {"mu": R, "kappa": Rplus}, ref_rand=ref_rand)
 
     def test_flat(self):
         with pm.Model():
-            f = pm.Flat('f')
+            f = pm.Flat("f")
             with pytest.raises(ValueError):
                 f.random(1)
 
     def test_half_flat(self):
         with pm.Model():
-            f = pm.HalfFlat('f')
+            f = pm.HalfFlat("f")
             with pytest.raises(ValueError):
                 f.random(1)
 
     def test_binomial(self):
-        pymc3_random_discrete(pm.Binomial, {'n': Nat, 'p': Unit}, ref_rand=st.binom.rvs)
+        pymc3_random_discrete(pm.Binomial, {"n": Nat, "p": Unit}, ref_rand=st.binom.rvs)
 
     def test_beta_binomial(self):
-        pymc3_random_discrete(pm.BetaBinomial, {'n': Nat, 'alpha': Rplus, 'beta': Rplus},
-                              ref_rand=self._beta_bin)
+        pymc3_random_discrete(
+            pm.BetaBinomial,
+            {"n": Nat, "alpha": Rplus, "beta": Rplus},
+            ref_rand=self._beta_bin,
+        )
 
     def _beta_bin(self, n, alpha, beta, size=None):
         return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size))
 
     def test_bernoulli(self):
-        pymc3_random_discrete(pm.Bernoulli, {'p': Unit},
-                              ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size))
+        pymc3_random_discrete(
+            pm.Bernoulli,
+            {"p": Unit},
+            ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size),
+        )
 
     def test_poisson(self):
-        pymc3_random_discrete(pm.Poisson, {'mu': Rplusbig}, size=500, ref_rand=st.poisson.rvs)
+        pymc3_random_discrete(
+            pm.Poisson, {"mu": Rplusbig}, size=500, ref_rand=st.poisson.rvs
+        )
 
     def test_negative_binomial(self):
         def ref_rand(size, alpha, mu):
             return st.nbinom.rvs(alpha, alpha / (mu + alpha), size=size)
-        pymc3_random_discrete(pm.NegativeBinomial, {'mu': Rplusbig, 'alpha': Rplusbig},
-                              size=100, fails=50, ref_rand=ref_rand)
+
+        pymc3_random_discrete(
+            pm.NegativeBinomial,
+            {"mu": Rplusbig, "alpha": Rplusbig},
+            size=100,
+            fails=50,
+            ref_rand=ref_rand,
+        )
 
     def test_geometric(self):
-        pymc3_random_discrete(pm.Geometric, {'p': Unit}, size=500, fails=50, ref_rand=nr.geometric)
+        pymc3_random_discrete(
+            pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric
+        )
 
     def test_discrete_uniform(self):
         def ref_rand(size, lower, upper):
             return st.randint.rvs(lower, upper + 1, size=size)
-        pymc3_random_discrete(pm.DiscreteUniform, {'lower': -NatSmall, 'upper': NatSmall},
-                              ref_rand=ref_rand)
+
+        pymc3_random_discrete(
+            pm.DiscreteUniform,
+            {"lower": -NatSmall, "upper": NatSmall},
+            ref_rand=ref_rand,
+        )
 
     def test_discrete_weibull(self):
         def ref_rand(size, q, beta):
             u = np.random.uniform(size=size)
 
-            return np.ceil(np.power(np.log(1 - u) / np.log(q), 1. / beta)) - 1
+            return np.ceil(np.power(np.log(1 - u) / np.log(q), 1.0 / beta)) - 1
 
-        pymc3_random_discrete(pm.DiscreteWeibull, {'q': Unit, 'beta': Rplusdunif},
-                              ref_rand=ref_rand)
+        pymc3_random_discrete(
+            pm.DiscreteWeibull, {"q": Unit, "beta": Rplusdunif}, ref_rand=ref_rand
+        )
 
-    @pytest.mark.parametrize('s', [2, 3, 4])
+    @pytest.mark.parametrize("s", [2, 3, 4])
     def test_categorical_random(self, s):
         def ref_rand(size, p):
             return nr.choice(np.arange(p.shape[0]), p=p, size=size)
-        pymc3_random_discrete(pm.Categorical, {'p': Simplex(s)}, ref_rand=ref_rand)
+
+        pymc3_random_discrete(pm.Categorical, {"p": Simplex(s)}, ref_rand=ref_rand)
 
     def test_constant_dist(self):
         def ref_rand(size, c):
             return c * np.ones(size, dtype=int)
-        pymc3_random_discrete(pm.Constant, {'c': I}, ref_rand=ref_rand)
+
+        pymc3_random_discrete(pm.Constant, {"c": I}, ref_rand=ref_rand)
 
     def test_mv_normal(self):
         def ref_rand(size, mu, cov):
@@ -596,41 +701,82 @@ def ref_rand_uchol(size, mu, chol):
             return ref_rand(size, mu, np.dot(chol.T, chol))
 
         for n in [2, 3]:
-            pymc3_random(pm.MvNormal, {'mu': Vector(R, n), 'cov': PdMatrix(n)},
-                         size=100, valuedomain=Vector(R, n), ref_rand=ref_rand)
-            pymc3_random(pm.MvNormal, {'mu': Vector(R, n), 'tau': PdMatrix(n)},
-                         size=100, valuedomain=Vector(R, n), ref_rand=ref_rand_tau)
-            pymc3_random(pm.MvNormal, {'mu': Vector(R, n), 'chol': PdMatrixChol(n)},
-                         size=100, valuedomain=Vector(R, n), ref_rand=ref_rand_chol)
             pymc3_random(
                 pm.MvNormal,
-                {'mu': Vector(R, n), 'chol': PdMatrixCholUpper(n)},
-                size=100, valuedomain=Vector(R, n), ref_rand=ref_rand_uchol,
-                extra_args={'lower': False}
+                {"mu": Vector(R, n), "cov": PdMatrix(n)},
+                size=100,
+                valuedomain=Vector(R, n),
+                ref_rand=ref_rand,
+            )
+            pymc3_random(
+                pm.MvNormal,
+                {"mu": Vector(R, n), "tau": PdMatrix(n)},
+                size=100,
+                valuedomain=Vector(R, n),
+                ref_rand=ref_rand_tau,
+            )
+            pymc3_random(
+                pm.MvNormal,
+                {"mu": Vector(R, n), "chol": PdMatrixChol(n)},
+                size=100,
+                valuedomain=Vector(R, n),
+                ref_rand=ref_rand_chol,
+            )
+            pymc3_random(
+                pm.MvNormal,
+                {"mu": Vector(R, n), "chol": PdMatrixCholUpper(n)},
+                size=100,
+                valuedomain=Vector(R, n),
+                ref_rand=ref_rand_uchol,
+                extra_args={"lower": False},
             )
 
     def test_matrix_normal(self):
         def ref_rand(size, mu, rowcov, colcov):
-            return st.matrix_normal.rvs(mean=mu, rowcov=rowcov, colcov=colcov, size=size)
+            return st.matrix_normal.rvs(
+                mean=mu, rowcov=rowcov, colcov=colcov, size=size
+            )
 
         # def ref_rand_tau(size, mu, tau):
         #     return ref_rand(size, mu, linalg.inv(tau))
 
         def ref_rand_chol(size, mu, rowchol, colchol):
-            return ref_rand(size, mu, rowcov=np.dot(rowchol, rowchol.T),
-                            colcov=np.dot(colchol, colchol.T))
+            return ref_rand(
+                size,
+                mu,
+                rowcov=np.dot(rowchol, rowchol.T),
+                colcov=np.dot(colchol, colchol.T),
+            )
 
         def ref_rand_uchol(size, mu, rowchol, colchol):
-            return ref_rand(size, mu, rowcov=np.dot(rowchol.T, rowchol),
-                            colcov=np.dot(colchol.T, colchol))
+            return ref_rand(
+                size,
+                mu,
+                rowcov=np.dot(rowchol.T, rowchol),
+                colcov=np.dot(colchol.T, colchol),
+            )
 
         for n in [2, 3]:
-            pymc3_random(pm.MatrixNormal, {'mu': RealMatrix(n, n), 'rowcov': PdMatrix(n), 'colcov': PdMatrix(n)},
-                         size=n, valuedomain=RealMatrix(n, n), ref_rand=ref_rand)
+            pymc3_random(
+                pm.MatrixNormal,
+                {"mu": RealMatrix(n, n), "rowcov": PdMatrix(n), "colcov": PdMatrix(n)},
+                size=n,
+                valuedomain=RealMatrix(n, n),
+                ref_rand=ref_rand,
+            )
             # pymc3_random(pm.MatrixNormal, {'mu': RealMatrix(n, n), 'tau': PdMatrix(n)},
             #              size=n, valuedomain=RealMatrix(n, n), ref_rand=ref_rand_tau)
-            pymc3_random(pm.MatrixNormal, {'mu': RealMatrix(n, n), 'rowchol': PdMatrixChol(n), 'colchol': PdMatrixChol(n)},
-                         size=n, valuedomain=RealMatrix(n, n), ref_rand=ref_rand_chol)
+            pymc3_random(
+                pm.MatrixNormal,
+                {
+                    "mu": RealMatrix(n, n),
+                    "rowchol": PdMatrixChol(n),
+                    "colchol": PdMatrixChol(n),
+                },
+                size=n,
+                valuedomain=RealMatrix(n, n),
+                ref_rand=ref_rand_chol,
+            )
             # pymc3_random(
             #     pm.MvNormal,
             #     {'mu': RealMatrix(n, n), 'rowchol': PdMatrixCholUpper(n), 'colchol': PdMatrixCholUpper(n)},
@@ -641,7 +787,7 @@ def ref_rand_uchol(size, mu, rowchol, colchol):
     def test_kronecker_normal(self):
         def ref_rand(size, mu, covs, sigma):
             cov = pm.math.kronecker(covs[0], covs[1]).eval()
-            cov += sigma**2 * np.identity(cov.shape[0])
+            cov += sigma ** 2 * np.identity(cov.shape[0])
             return st.multivariate_normal.rvs(mean=mu, cov=cov, size=size)
 
         def ref_rand_chol(size, mu, chols, sigma):
@@ -657,98 +803,135 @@ def ref_rand_evd(size, mu, evds, sigma):
         sizes = [2, 3]
         sigmas = [0, 1]
         for n, sigma in zip(sizes, sigmas):
-            N = n**2
+            N = n ** 2
             covs = [RandomPdMatrix(n), RandomPdMatrix(n)]
             chols = list(map(np.linalg.cholesky, covs))
             evds = list(map(np.linalg.eigh, covs))
-            dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
-            mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
+            dom = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N)
+            mu = Domain([np.random.randn(N) * 0.1], edges=(None, None), shape=N)
 
-            std_args = {'mu': mu}
-            cov_args = {'covs': covs}
-            chol_args = {'chols': chols}
-            evd_args = {'evds': evds}
+            std_args = {"mu": mu}
+            cov_args = {"covs": covs}
+            chol_args = {"chols": chols}
+            evd_args = {"evds": evds}
             if sigma is not None and sigma != 0:
-                std_args['sigma'] = Domain([sigma], edges=(None, None))
+                std_args["sigma"] = Domain([sigma], edges=(None, None))
             else:
                 for args in [cov_args, chol_args, evd_args]:
-                    args['sigma'] = sigma
+                    args["sigma"] = sigma
 
             pymc3_random(
-                 pm.KroneckerNormal, std_args, valuedomain=dom,
-                 ref_rand=ref_rand, extra_args=cov_args, model_args=cov_args)
+                pm.KroneckerNormal,
+                std_args,
+                valuedomain=dom,
+                ref_rand=ref_rand,
+                extra_args=cov_args,
+                model_args=cov_args,
+            )
             pymc3_random(
-                 pm.KroneckerNormal, std_args, valuedomain=dom,
-                 ref_rand=ref_rand_chol, extra_args=chol_args,
-                 model_args=chol_args)
+                pm.KroneckerNormal,
+                std_args,
+                valuedomain=dom,
+                ref_rand=ref_rand_chol,
+                extra_args=chol_args,
+                model_args=chol_args,
+            )
             pymc3_random(
-                 pm.KroneckerNormal, std_args, valuedomain=dom,
-                 ref_rand=ref_rand_evd, extra_args=evd_args,
-                 model_args=evd_args)
+                pm.KroneckerNormal,
+                std_args,
+                valuedomain=dom,
+                ref_rand=ref_rand_evd,
+                extra_args=evd_args,
+                model_args=evd_args,
+            )
 
     def test_mv_t(self):
         def ref_rand(size, nu, Sigma, mu):
             normal = st.multivariate_normal.rvs(cov=Sigma, size=size).T
             chi2 = st.chi2.rvs(df=nu, size=size)
             return mu + np.sqrt(nu) * (normal / chi2).T
+
         for n in [2, 3]:
-            pymc3_random(pm.MvStudentT,
-                         {'nu': Domain([5, 10, 25, 50]), 'Sigma': PdMatrix(
-                             n), 'mu': Vector(R, n)},
-                         size=100, valuedomain=Vector(R, n), ref_rand=ref_rand)
+            pymc3_random(
+                pm.MvStudentT,
+                {
+                    "nu": Domain([5, 10, 25, 50]),
+                    "Sigma": PdMatrix(n),
+                    "mu": Vector(R, n),
+                },
+                size=100,
+                valuedomain=Vector(R, n),
+                ref_rand=ref_rand,
+            )
 
     def test_dirichlet(self):
         def ref_rand(size, a):
             return st.dirichlet.rvs(a, size=size)
+
         for n in [2, 3]:
-            pymc3_random(pm.Dirichlet, {'a': Vector(Rplus, n)},
-                         valuedomain=Simplex(n), size=100, ref_rand=ref_rand)
+            pymc3_random(
+                pm.Dirichlet,
+                {"a": Vector(Rplus, n)},
+                valuedomain=Simplex(n),
+                size=100,
+                ref_rand=ref_rand,
+            )
 
     def test_multinomial(self):
         def ref_rand(size, p, n):
             return nr.multinomial(pvals=p, n=n, size=size)
+
         for n in [2, 3]:
-            pymc3_random_discrete(pm.Multinomial, {'p': Simplex(n), 'n': Nat},
-                                  valuedomain=Vector(Nat, n), size=100, ref_rand=ref_rand)
+            pymc3_random_discrete(
+                pm.Multinomial,
+                {"p": Simplex(n), "n": Nat},
+                valuedomain=Vector(Nat, n),
+                size=100,
+                ref_rand=ref_rand,
+            )
 
     def test_gumbel(self):
         def ref_rand(size, mu, beta):
             return st.gumbel_r.rvs(loc=mu, scale=beta, size=size)
-        pymc3_random(pm.Gumbel, {'mu': R, 'beta': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Gumbel, {"mu": R, "beta": Rplus}, ref_rand=ref_rand)
 
     def test_logistic(self):
         def ref_rand(size, mu, s):
             return st.logistic.rvs(loc=mu, scale=s, size=size)
-        pymc3_random(pm.Logistic, {'mu': R, 's': Rplus}, ref_rand=ref_rand)
+
+        pymc3_random(pm.Logistic, {"mu": R, "s": Rplus}, ref_rand=ref_rand)
 
     def test_logitnormal(self):
         def ref_rand(size, mu, sd):
             return expit(st.norm.rvs(loc=mu, scale=sd, size=size))
-        pymc3_random(pm.LogitNormal, {'mu': R, 'sd': Rplus}, ref_rand=ref_rand)
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+        pymc3_random(pm.LogitNormal, {"mu": R, "sd": Rplus}, ref_rand=ref_rand)
+
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_interpolated(self):
         for mu in R.vals:
             for sd in Rplus.vals:
-                #pylint: disable=cell-var-from-loop
+                # pylint: disable=cell-var-from-loop
                 def ref_rand(size):
                     return st.norm.rvs(loc=mu, scale=sd, size=size)
 
-                class TestedInterpolated (pm.Interpolated):
-
+                class TestedInterpolated(pm.Interpolated):
                     def __init__(self, **kwargs):
                         x_points = np.linspace(mu - 5 * sd, mu + 5 * sd, 100)
                         pdf_points = st.norm.pdf(x_points, loc=mu, scale=sd)
                         super(TestedInterpolated, self).__init__(
-                            x_points=x_points,
-                            pdf_points=pdf_points,
-                            **kwargs
+                            x_points=x_points, pdf_points=pdf_points, **kwargs
                         )
 
                 pymc3_random(TestedInterpolated, {}, ref_rand=ref_rand)
 
-    @pytest.mark.skip('Wishart random sampling not implemented.\n'
-                      'See https://github.com/pymc-devs/pymc3/issues/538')
+    @pytest.mark.skip(
+        "Wishart random sampling not implemented.\n"
+        "See https://github.com/pymc-devs/pymc3/issues/538"
+    )
     def test_wishart(self):
         # Wishart non current recommended for use:
         # https://github.com/pymc-devs/pymc3/issues/538
@@ -762,88 +945,77 @@ def test_wishart(self):
 
     def test_lkj(self):
         for n in [2, 10, 50]:
-            #pylint: disable=cell-var-from-loop
-            shape = n*(n-1)//2
+            # pylint: disable=cell-var-from-loop
+            shape = n * (n - 1) // 2
 
             def ref_rand(size, eta):
-                beta = eta - 1 + n/2
-                return (st.beta.rvs(size=(size, shape), a=beta, b=beta)-.5)*2
-
-            class TestedLKJCorr (pm.LKJCorr):
+                beta = eta - 1 + n / 2
+                return (st.beta.rvs(size=(size, shape), a=beta, b=beta) - 0.5) * 2
 
+            class TestedLKJCorr(pm.LKJCorr):
                 def __init__(self, **kwargs):
-                    kwargs.pop('shape', None)
-                    super(TestedLKJCorr, self).__init__(
-                            n=n,
-                            **kwargs
-                    )
+                    kwargs.pop("shape", None)
+                    super(TestedLKJCorr, self).__init__(n=n, **kwargs)
 
-            pymc3_random(TestedLKJCorr,
-                     {'eta': Domain([1., 10., 100.])},
-                     size=10000//n,
-                     ref_rand=ref_rand)
+            pymc3_random(
+                TestedLKJCorr,
+                {"eta": Domain([1.0, 10.0, 100.0])},
+                size=10000 // n,
+                ref_rand=ref_rand,
+            )
 
     def test_normalmixture(self):
         def ref_rand(size, w, mu, sd):
             component = np.random.choice(w.size, size=size, p=w)
             return np.random.normal(mu[component], sd[component], size=size)
 
-        pymc3_random(pm.NormalMixture, {'w': Simplex(2),
-                     'mu': Domain([[.05, 2.5], [-5., 1.]], edges=(None, None)),
-                     'sd': Domain([[1, 1], [1.5, 2.]], edges=(None, None))},
-                     extra_args={'comp_shape': 2},
-                     size=1000,
-                     ref_rand=ref_rand)
-        pymc3_random(pm.NormalMixture, {'w': Simplex(3),
-                     'mu': Domain([[-5., 1., 2.5]], edges=(None, None)),
-                     'sd': Domain([[1.5, 2., 3.]], edges=(None, None))},
-                     extra_args={'comp_shape': 3},
-                     size=1000,
-                     ref_rand=ref_rand)
+        pymc3_random(
+            pm.NormalMixture,
+            {
+                "w": Simplex(2),
+                "mu": Domain([[0.05, 2.5], [-5.0, 1.0]], edges=(None, None)),
+                "sd": Domain([[1, 1], [1.5, 2.0]], edges=(None, None)),
+            },
+            extra_args={"comp_shape": 2},
+            size=1000,
+            ref_rand=ref_rand,
+        )
+        pymc3_random(
+            pm.NormalMixture,
+            {
+                "w": Simplex(3),
+                "mu": Domain([[-5.0, 1.0, 2.5]], edges=(None, None)),
+                "sd": Domain([[1.5, 2.0, 3.0]], edges=(None, None)),
+            },
+            extra_args={"comp_shape": 3},
+            size=1000,
+            ref_rand=ref_rand,
+        )
 
 
 def test_mixture_random_shape():
     # test the shape broadcasting in mixture random
-    y = np.concatenate([nr.poisson(5, size=10),
-                        nr.poisson(9, size=10)])
+    y = np.concatenate([nr.poisson(5, size=10), nr.poisson(9, size=10)])
     with pm.Model() as m:
         comp0 = pm.Poisson.dist(mu=np.ones(2))
-        w0 = pm.Dirichlet('w0', a=np.ones(2))
-        like0 = pm.Mixture('like0',
-                           w=w0,
-                           comp_dists=comp0,
-                           observed=y)
-
-        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)),
-                                shape=(20, 2))
-        w1 = pm.Dirichlet('w1', a=np.ones(2))
-        like1 = pm.Mixture('like1',
-                           w=w1,
-                           comp_dists=comp1,
-                           observed=y)
+        w0 = pm.Dirichlet("w0", a=np.ones(2))
+        like0 = pm.Mixture("like0", w=w0, comp_dists=comp0, observed=y)
+
+        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)), shape=(20, 2))
+        w1 = pm.Dirichlet("w1", a=np.ones(2))
+        like1 = pm.Mixture("like1", w=w1, comp_dists=comp1, observed=y)
 
         comp2 = pm.Poisson.dist(mu=np.ones(2))
-        w2 = pm.Dirichlet('w2',
-                          a=np.ones(2),
-                          shape=(20, 2))
-        like2 = pm.Mixture('like2',
-                           w=w2,
-                           comp_dists=comp2,
-                           observed=y)
-
-        comp3 = pm.Poisson.dist(mu=np.ones(2),
-                                shape=(20, 2))
-        w3 = pm.Dirichlet('w3',
-                          a=np.ones(2),
-                          shape=(20, 2))
-        like3 = pm.Mixture('like3',
-                           w=w3,
-                           comp_dists=comp3,
-                           observed=y)
-
-    rand0, rand1, rand2, rand3 = draw_values([like0, like1, like2, like3],
-                                             point=m.test_point,
-                                             size=100)
+        w2 = pm.Dirichlet("w2", a=np.ones(2), shape=(20, 2))
+        like2 = pm.Mixture("like2", w=w2, comp_dists=comp2, observed=y)
+
+        comp3 = pm.Poisson.dist(mu=np.ones(2), shape=(20, 2))
+        w3 = pm.Dirichlet("w3", a=np.ones(2), shape=(20, 2))
+        like3 = pm.Mixture("like3", w=w3, comp_dists=comp3, observed=y)
+
+    rand0, rand1, rand2, rand3 = draw_values(
+        [like0, like1, like2, like3], point=m.test_point, size=100
+    )
     assert rand0.shape == (100, 20)
     assert rand1.shape == (100, 20)
     assert rand2.shape == (100, 20)
@@ -851,29 +1023,34 @@ def test_mixture_random_shape():
 
     with m:
         ppc = pm.sample_posterior_predictive([m.test_point], samples=200)
-    assert ppc['like0'].shape == (200, 20)
-    assert ppc['like1'].shape == (200, 20)
-    assert ppc['like2'].shape == (200, 20)
-    assert ppc['like3'].shape == (200, 20)
+    assert ppc["like0"].shape == (200, 20)
+    assert ppc["like1"].shape == (200, 20)
+    assert ppc["like2"].shape == (200, 20)
+    assert ppc["like3"].shape == (200, 20)
 
 
 def test_density_dist_with_random_sampleable():
     with pm.Model() as model:
-        mu = pm.Normal('mu', 0, 1)
+        mu = pm.Normal("mu", 0, 1)
         normal_dist = pm.Normal.dist(mu, 1)
-        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random)
+        pm.DensityDist(
+            "density_dist",
+            normal_dist.logp,
+            observed=np.random.randn(100),
+            random=normal_dist.random,
+        )
         trace = pm.sample(100)
 
     samples = 500
     ppc = pm.sample_posterior_predictive(trace, samples=samples, model=model, size=100)
-    assert len(ppc['density_dist']) == samples
+    assert len(ppc["density_dist"]) == samples
 
 
 def test_density_dist_without_random_not_sampleable():
     with pm.Model() as model:
-        mu = pm.Normal('mu', 0, 1)
+        mu = pm.Normal("mu", 0, 1)
         normal_dist = pm.Normal.dist(mu, 1)
-        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100))
+        pm.DensityDist("density_dist", normal_dist.logp, observed=np.random.randn(100))
         trace = pm.sample(100)
 
     samples = 500
diff --git a/pymc3/tests/test_distributions_timeseries.py b/pymc3/tests/test_distributions_timeseries.py
index fe083ddff4..e35a6ccc53 100644
--- a/pymc3/tests/test_distributions_timeseries.py
+++ b/pymc3/tests/test_distributions_timeseries.py
@@ -8,40 +8,42 @@
 
 import numpy as np
 
+
 def test_AR():
     # AR1
-    data = np.array([0.3,1,2,3,4])
+    data = np.array([0.3, 1, 2, 3, 4])
     phi = np.array([0.99])
     with Model() as t:
-        y = AR('y', phi, sd=1, shape=len(data))
-        z = Normal('z', mu=phi*data[:-1], sd=1, shape=len(data)-1)
-    ar_like = t['y'].logp({'z':data[1:], 'y': data})
-    reg_like = t['z'].logp({'z':data[1:], 'y': data})
+        y = AR("y", phi, sd=1, shape=len(data))
+        z = Normal("z", mu=phi * data[:-1], sd=1, shape=len(data) - 1)
+    ar_like = t["y"].logp({"z": data[1:], "y": data})
+    reg_like = t["z"].logp({"z": data[1:], "y": data})
     np.testing.assert_allclose(ar_like, reg_like)
 
     # AR1 and AR(1)
     with Model() as t:
-        rho = Normal('rho', 0., 1.)
-        y1 = AR1('y1', rho, 1., observed=data)
-        y2 = AR('y2', rho, 1., init=Normal.dist(0, 1), observed=data)
-    np.testing.assert_allclose(y1.logp(t.test_point),
-                               y2.logp(t.test_point))
+        rho = Normal("rho", 0.0, 1.0)
+        y1 = AR1("y1", rho, 1.0, observed=data)
+        y2 = AR("y2", rho, 1.0, init=Normal.dist(0, 1), observed=data)
+    np.testing.assert_allclose(y1.logp(t.test_point), y2.logp(t.test_point))
 
     # AR1 + constant
     with Model() as t:
-        y = AR('y', [0.3, phi], sd=1, shape=len(data), constant=True)
-        z = Normal('z', mu=0.3 + phi*data[:-1], sd=1, shape=len(data)-1)
-    ar_like = t['y'].logp({'z':data[1:], 'y': data})
-    reg_like = t['z'].logp({'z':data[1:], 'y': data})
+        y = AR("y", [0.3, phi], sd=1, shape=len(data), constant=True)
+        z = Normal("z", mu=0.3 + phi * data[:-1], sd=1, shape=len(data) - 1)
+    ar_like = t["y"].logp({"z": data[1:], "y": data})
+    reg_like = t["z"].logp({"z": data[1:], "y": data})
     np.testing.assert_allclose(ar_like, reg_like)
 
     # AR2
     phi = np.array([0.84, 0.10])
     with Model() as t:
-        y = AR('y', phi, sd=1, shape=len(data))
-        z = Normal('z', mu=phi[0]*data[1:-1]+phi[1]*data[:-2], sd=1, shape=len(data)-2)
-    ar_like = t['y'].logp({'z':data[2:], 'y': data})
-    reg_like = t['z'].logp({'z':data[2:], 'y': data})
+        y = AR("y", phi, sd=1, shape=len(data))
+        z = Normal(
+            "z", mu=phi[0] * data[1:-1] + phi[1] * data[:-2], sd=1, shape=len(data) - 2
+        )
+    ar_like = t["y"].logp({"z": data[2:], "y": data})
+    reg_like = t["z"].logp({"z": data[2:], "y": data})
     np.testing.assert_allclose(ar_like, reg_like)
 
 
@@ -51,57 +53,73 @@ def test_AR_nd():
     beta_tp = np.random.randn(p, n)
     y_tp = np.random.randn(T, n)
     with Model() as t0:
-        beta = Normal('beta', 0., 1.,
-                      shape=(p, n),
-                      testval=beta_tp)
-        AR('y', beta, sd=1.0,
-           shape=(T, n), testval=y_tp)
+        beta = Normal("beta", 0.0, 1.0, shape=(p, n), testval=beta_tp)
+        AR("y", beta, sd=1.0, shape=(T, n), testval=y_tp)
 
     with Model() as t1:
-        beta = Normal('beta', 0., 1.,
-                      shape=(p, n),
-                      testval=beta_tp)
+        beta = Normal("beta", 0.0, 1.0, shape=(p, n), testval=beta_tp)
         for i in range(n):
-            AR('y_%d' % i, beta[:, i], sd=1.0,
-               shape=T, testval=y_tp[:, i])
+            AR("y_%d" % i, beta[:, i], sd=1.0, shape=T, testval=y_tp[:, i])
 
-    np.testing.assert_allclose(t0.logp(t0.test_point),
-                               t1.logp(t1.test_point))
+    np.testing.assert_allclose(t0.logp(t0.test_point), t1.logp(t1.test_point))
 
 
 def test_GARCH11():
     # test data ~ N(0, 1)
-    data = np.array([-1.35078362, -0.81254164,  0.28918551, -2.87043544, -0.94353337,
-                     0.83660719, -0.23336562, -0.58586298, -1.36856736, -1.60832975,
-                     -1.31403141,  0.05446936, -0.97213128, -0.18928725,  1.62011258,
-                     -0.95978616, -2.06536047,  0.6556103 , -0.27816645, -1.26413397])
+    data = np.array(
+        [
+            -1.35078362,
+            -0.81254164,
+            0.28918551,
+            -2.87043544,
+            -0.94353337,
+            0.83660719,
+            -0.23336562,
+            -0.58586298,
+            -1.36856736,
+            -1.60832975,
+            -1.31403141,
+            0.05446936,
+            -0.97213128,
+            -0.18928725,
+            1.62011258,
+            -0.95978616,
+            -2.06536047,
+            0.6556103,
+            -0.27816645,
+            -1.26413397,
+        ]
+    )
     omega = 0.6
     alpha_1 = 0.4
     beta_1 = 0.5
     initial_vol = np.float64(0.9)
     vol = np.empty_like(data)
     vol[0] = initial_vol
-    for i in range(len(data)-1):
-        vol[i+1] = np.sqrt(omega + beta_1*vol[i]**2 + alpha_1*data[i]**2)
+    for i in range(len(data) - 1):
+        vol[i + 1] = np.sqrt(omega + beta_1 * vol[i] ** 2 + alpha_1 * data[i] ** 2)
 
     with Model() as t:
-        y = GARCH11('y', omega=omega, alpha_1=alpha_1, beta_1=beta_1,
-                    initial_vol=initial_vol, shape=data.shape)
-        z = Normal('z', mu=0, sd=vol, shape=data.shape)
-    garch_like = t['y'].logp({'z':data, 'y': data})
-    reg_like = t['z'].logp({'z':data, 'y': data})
+        y = GARCH11(
+            "y",
+            omega=omega,
+            alpha_1=alpha_1,
+            beta_1=beta_1,
+            initial_vol=initial_vol,
+            shape=data.shape,
+        )
+        z = Normal("z", mu=0, sd=vol, shape=data.shape)
+    garch_like = t["y"].logp({"z": data, "y": data})
+    reg_like = t["z"].logp({"z": data, "y": data})
     np.testing.assert_allclose(garch_like, reg_like)
 
 
-
 def _gen_sde_path(sde, pars, dt, n, x0):
     xs = [x0]
     wt = np.random.normal(size=(n,) if isinstance(x0, float) else (n, x0.size))
     for i in range(n):
         f, g = sde(xs[-1], *pars)
-        xs.append(
-            xs[-1] + f * dt + np.sqrt(dt) * g * wt[i]
-        )
+        xs.append(xs[-1] + f * dt + np.sqrt(dt) * g * wt[i])
     return np.array(xs)
 
 
@@ -115,17 +133,17 @@ def test_linear():
     z = x + np.random.randn(x.size) * sig2
     # build model
     with Model() as model:
-        lamh = Flat('lamh')
-        xh = EulerMaruyama('xh', dt, sde, (lamh,), shape=N + 1, testval=x)
-        Normal('zh', mu=xh, sd=sig2, observed=z)
+        lamh = Flat("lamh")
+        xh = EulerMaruyama("xh", dt, sde, (lamh,), shape=N + 1, testval=x)
+        Normal("zh", mu=xh, sd=sig2, observed=z)
     # invert
     with model:
-        trace = sample(init='advi+adapt_diag', chains=1)
+        trace = sample(init="advi+adapt_diag", chains=1)
 
     ppc = sample_posterior_predictive(trace, model=model)
     # test
     p95 = [2.5, 97.5]
     lo, hi = np.percentile(trace[lamh], p95, axis=0)
     assert (lo < lam) and (lam < hi)
-    lo, hi = np.percentile(ppc['zh'], p95, axis=0)
+    lo, hi = np.percentile(ppc["zh"], p95, axis=0)
     assert ((lo < z) * (z < hi)).mean() > 0.95
diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py
index 4a0f0d3b91..9d0c757186 100644
--- a/pymc3/tests/test_examples.py
+++ b/pymc3/tests/test_examples.py
@@ -9,43 +9,46 @@
 
 from .helpers import SeededTest
 
-matplotlib.use('Agg', warn=False)
+matplotlib.use("Agg", warn=False)
 
 
 def get_city_data():
     """Helper to get city data"""
-    data = pd.read_csv(pm.get_data('srrs2.dat'))
-    cty_data = pd.read_csv(pm.get_data('cty.dat'))
+    data = pd.read_csv(pm.get_data("srrs2.dat"))
+    cty_data = pd.read_csv(pm.get_data("cty.dat"))
 
-    data = data[data.state == 'MN']
+    data = data[data.state == "MN"]
 
-    data['fips'] = data.stfips * 1000 + data.cntyfips
-    cty_data['fips'] = cty_data.stfips * 1000 + cty_data.ctfips
-    data['lradon'] = np.log(np.where(data.activity == 0, .1, data.activity))
-    data = data.merge(cty_data, 'inner', on='fips')
+    data["fips"] = data.stfips * 1000 + data.cntyfips
+    cty_data["fips"] = cty_data.stfips * 1000 + cty_data.ctfips
+    data["lradon"] = np.log(np.where(data.activity == 0, 0.1, data.activity))
+    data = data.merge(cty_data, "inner", on="fips")
 
-    unique = data[['fips']].drop_duplicates()
-    unique['group'] = np.arange(len(unique))
-    unique.set_index('fips')
-    return data.merge(unique, 'inner', on='fips')
+    unique = data[["fips"]].drop_duplicates()
+    unique["group"] = np.arange(len(unique))
+    unique.set_index("fips")
+    return data.merge(unique, "inner", on="fips")
 
 
 class TestARM5_4(SeededTest):
     def build_model(self):
-        data = pd.read_csv(pm.get_data('wells.dat'),
-                           delimiter=u' ', index_col=u'id',
-                           dtype={u'switch': np.int8})
+        data = pd.read_csv(
+            pm.get_data("wells.dat"),
+            delimiter=u" ",
+            index_col=u"id",
+            dtype={u"switch": np.int8},
+        )
         data.dist /= 100
         data.educ /= 4
         col = data.columns
         P = data[col[1:]]
         P -= P.mean()
-        P['1'] = 1
+        P["1"] = 1
 
         with pm.Model() as model:
-            effects = pm.Normal('effects', mu=0, sd=100, shape=len(P.columns))
+            effects = pm.Normal("effects", mu=0, sd=100, shape=len(P.columns))
             logit_p = tt.dot(floatX(np.array(P)), effects)
-            pm.Bernoulli('s', logit_p=logit_p, observed=floatX(data.switch.values))
+            pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values))
         return model
 
     def test_run(self):
@@ -58,32 +61,37 @@ class TestARM12_6(SeededTest):
     def build_model(self):
         data = get_city_data()
 
-        self.obs_means = data.groupby('fips').lradon.mean().as_matrix()
+        self.obs_means = data.groupby("fips").lradon.mean().as_matrix()
 
         lradon = data.lradon.as_matrix()
         floor = data.floor.as_matrix()
         group = data.group.as_matrix()
 
         with pm.Model() as model:
-            groupmean = pm.Normal('groupmean', 0, 10. ** -2.)
-            groupsd = pm.Uniform('groupsd', 0, 10.)
-            sd = pm.Uniform('sd', 0, 10.)
-            floor_m = pm.Normal('floor_m', 0, 5. ** -2.)
-            means = pm.Normal('means', groupmean, groupsd ** -2., shape=len(self.obs_means))
-            pm.Normal('lr', floor * floor_m + means[group], sd ** -2., observed=lradon)
+            groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0)
+            groupsd = pm.Uniform("groupsd", 0, 10.0)
+            sd = pm.Uniform("sd", 0, 10.0)
+            floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0)
+            means = pm.Normal(
+                "means", groupmean, groupsd ** -2.0, shape=len(self.obs_means)
+            )
+            pm.Normal("lr", floor * floor_m + means[group], sd ** -2.0, observed=lradon)
         return model
 
     def too_slow(self):
         model = self.build_model()
-        start = {'groupmean': self.obs_means.mean(),
-                 'groupsd_interval__': 0,
-                 'sd_interval__': 0,
-                 'means': self.obs_means,
-                 'floor_m': 0.,
-                 }
+        start = {
+            "groupmean": self.obs_means.mean(),
+            "groupsd_interval__": 0,
+            "sd_interval__": 0,
+            "means": self.obs_means,
+            "floor_m": 0.0,
+        }
         with model:
-            start = pm.find_MAP(start=start,
-                                vars=[model['groupmean'], model['sd_interval__'], model['floor_m']])
+            start = pm.find_MAP(
+                start=start,
+                vars=[model["groupmean"], model["sd_interval__"], model["floor_m"]],
+            )
             step = pm.NUTS(model.vars, scaling=start)
             pm.sample(50, step=step, start=start)
 
@@ -91,7 +99,7 @@ def too_slow(self):
 class TestARM12_6Uranium(SeededTest):
     def build_model(self):
         data = get_city_data()
-        self.obs_means = data.groupby('fips').lradon.mean()
+        self.obs_means = data.groupby("fips").lradon.mean()
 
         lradon = data.lradon.as_matrix()
         floor = data.floor.as_matrix()
@@ -99,27 +107,35 @@ def build_model(self):
         ufull = data.Uppm.as_matrix()
 
         with pm.Model() as model:
-            groupmean = pm.Normal('groupmean', 0, 10. ** -2.)
-            groupsd = pm.Uniform('groupsd', 0, 10.)
-            sd = pm.Uniform('sd', 0, 10.)
-            floor_m = pm.Normal('floor_m', 0, 5. ** -2.)
-            u_m = pm.Normal('u_m', 0, 5. ** -2)
-            means = pm.Normal('means', groupmean, groupsd ** -2., shape=len(self.obs_means))
-            pm.Normal('lr', floor * floor_m + means[group] + ufull * u_m, sd ** - 2.,
-                      observed=lradon)
+            groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0)
+            groupsd = pm.Uniform("groupsd", 0, 10.0)
+            sd = pm.Uniform("sd", 0, 10.0)
+            floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0)
+            u_m = pm.Normal("u_m", 0, 5.0 ** -2)
+            means = pm.Normal(
+                "means", groupmean, groupsd ** -2.0, shape=len(self.obs_means)
+            )
+            pm.Normal(
+                "lr",
+                floor * floor_m + means[group] + ufull * u_m,
+                sd ** -2.0,
+                observed=lradon,
+            )
         return model
 
     def too_slow(self):
         model = self.build_model()
         with model:
-            start = pm.Point({
-                'groupmean': self.obs_means.mean(),
-                'groupsd_interval__': 0,
-                'sd_interval__': 0,
-                'means': np.array(self.obs_means),
-                'u_m': np.array([.72]),
-                'floor_m': 0.,
-            })
+            start = pm.Point(
+                {
+                    "groupmean": self.obs_means.mean(),
+                    "groupsd_interval__": 0,
+                    "sd_interval__": 0,
+                    "means": np.array(self.obs_means),
+                    "u_m": np.array([0.72]),
+                    "floor_m": 0.0,
+                }
+            )
 
             start = pm.find_MAP(start, model.vars[:-1])
             H = model.fastd2logp()
@@ -130,13 +146,121 @@ def too_slow(self):
 
 
 def build_disaster_model(masked=False):
-    disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
-                               3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
-                               2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
-                               1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
-                               0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
-                               3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
-                               0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
+    disasters_data = np.array(
+        [
+            4,
+            5,
+            4,
+            0,
+            1,
+            4,
+            3,
+            4,
+            0,
+            6,
+            3,
+            3,
+            4,
+            0,
+            2,
+            6,
+            3,
+            3,
+            5,
+            4,
+            5,
+            3,
+            1,
+            4,
+            4,
+            1,
+            5,
+            5,
+            3,
+            4,
+            2,
+            5,
+            2,
+            2,
+            3,
+            4,
+            2,
+            1,
+            3,
+            2,
+            2,
+            1,
+            1,
+            1,
+            1,
+            3,
+            0,
+            0,
+            1,
+            0,
+            1,
+            1,
+            0,
+            0,
+            3,
+            1,
+            0,
+            3,
+            2,
+            2,
+            0,
+            1,
+            1,
+            1,
+            0,
+            1,
+            0,
+            1,
+            0,
+            0,
+            0,
+            2,
+            1,
+            0,
+            0,
+            0,
+            1,
+            1,
+            0,
+            2,
+            3,
+            3,
+            1,
+            1,
+            2,
+            1,
+            1,
+            1,
+            1,
+            2,
+            4,
+            2,
+            0,
+            0,
+            1,
+            4,
+            0,
+            0,
+            0,
+            1,
+            0,
+            0,
+            0,
+            0,
+            0,
+            1,
+            0,
+            0,
+            1,
+            0,
+            1,
+        ]
+    )
     if masked:
         disasters_data[[23, 68]] = -1
         disasters_data = np.ma.masked_values(disasters_data, value=-1)
@@ -144,27 +268,29 @@ def build_disaster_model(masked=False):
 
     with pm.Model() as model:
         # Prior for distribution of switchpoint location
-        switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years)
+        switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years)
         # Priors for pre- and post-switch mean number of disasters
-        early_mean = pm.Exponential('early_mean', lam=1.)
-        late_mean = pm.Exponential('late_mean', lam=1.)
+        early_mean = pm.Exponential("early_mean", lam=1.0)
+        late_mean = pm.Exponential("late_mean", lam=1.0)
         # Allocate appropriate Poisson rates to years before and after current
         # switchpoint location
         idx = np.arange(years)
         rate = tt.switch(switchpoint >= idx, early_mean, late_mean)
         # Data likelihood
-        pm.Poisson('disasters', rate, observed=disasters_data)
+        pm.Poisson("disasters", rate, observed=disasters_data)
     return model
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestDisasterModel(SeededTest):
     # Time series of recorded coal mining disasters in the UK from 1851 to 1962
     def test_disaster_model(self):
         model = build_disaster_model(masked=False)
         with model:
             # Initial values for stochastic nodes
-            start = {'early_mean': 2., 'late_mean': 3.}
+            start = {"early_mean": 2.0, "late_mean": 3.0}
             # Use slice sampler for means (other varibles auto-selected)
             step = pm.Slice([model.early_mean_log__, model.late_mean_log__])
             tr = pm.sample(500, tune=50, start=start, step=step, chains=2)
@@ -174,7 +300,7 @@ def test_disaster_model_missing(self):
         model = build_disaster_model(masked=True)
         with model:
             # Initial values for stochastic nodes
-            start = {'early_mean': 2., 'late_mean': 3.}
+            start = {"early_mean": 2.0, "late_mean": 3.0}
             # Use slice sampler for means (other varibles auto-selected)
             step = pm.Slice([model.early_mean_log__, model.late_mean_log__])
             tr = pm.sample(500, tune=50, start=start, step=step, chains=2)
@@ -187,10 +313,14 @@ def build_model(self):
         true_intercept = 1
         true_slope = 2
         self.x = np.linspace(0, 1, size)
-        self.y = true_intercept + self.x * true_slope + np.random.normal(scale=.5, size=size)
+        self.y = (
+            true_intercept
+            + self.x * true_slope
+            + np.random.normal(scale=0.5, size=size)
+        )
         data = dict(x=self.x, y=self.y)
         with pm.Model() as model:
-            pm.GLM.from_formula('y ~ x', data)
+            pm.GLM.from_formula("y ~ x", data)
         return model
 
     def test_run(self):
@@ -231,6 +361,7 @@ class TestLatentOccupancy(SeededTest):
     Created by Chris Fonnesbeck on 2008-07-28.
     Copyright (c) 2008 University of Otago. All rights reserved.
     """
+
     def setup_method(self):
         super(TestLatentOccupancy, self).setup_method()
         # Sample size
@@ -240,36 +371,41 @@ def setup_method(self):
         # True occupancy
         pi = 0.4
         # Simulate some data data
-        self.y = ((np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)).astype('int16')
+        self.y = (
+            (np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)
+        ).astype("int16")
 
     def build_model(self):
         with pm.Model() as model:
             # Estimated occupancy
-            psi = pm.Beta('psi', 1, 1)
+            psi = pm.Beta("psi", 1, 1)
             # Latent variable for occupancy
-            pm.Bernoulli('z', psi, shape=self.y.shape)
+            pm.Bernoulli("z", psi, shape=self.y.shape)
             # Estimated mean count
-            theta = pm.Uniform('theta', 0, 100)
+            theta = pm.Uniform("theta", 0, 100)
             # Poisson likelihood
-            pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y)
+            pm.ZeroInflatedPoisson("y", theta, psi, observed=self.y)
         return model
 
     def test_run(self):
         model = self.build_model()
         with model:
             start = {
-                'psi': np.array(0.5, dtype='f'),
-                'z': (self.y > 0).astype('int16'),
-                'theta': np.array(5, dtype='f'),
+                "psi": np.array(0.5, dtype="f"),
+                "z": (self.y > 0).astype("int16"),
+                "theta": np.array(5, dtype="f"),
             }
             step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__])
             step_two = pm.BinaryMetropolis([model.z])
             pm.sample(50, step=[step_one, step_two], start=start, chains=1)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to starting inf at starting logP")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"),
+    reason="Fails on float32 due to starting inf at starting logP",
+)
 class TestRSV(SeededTest):
-    '''
+    """
     This model estimates the population prevalence of respiratory syncytial virus
     (RSV) among children in Amman, Jordan, based on 3 years of admissions diagnosed
     with RSV to Al Bashir hospital.
@@ -280,7 +416,8 @@ class TestRSV(SeededTest):
     1-year-olds) for the proportion of the population in the city, as well as for
     the market share of the hospital. The latter is based on expert esimate, and
     hence encoded as a prior.
-    '''
+    """
+
     def build_model(self):
         # 1-year-old children in Jordan
         kids = np.array([180489, 191817, 190830])
@@ -290,15 +427,15 @@ def build_model(self):
         rsv_cases = np.array([40, 59, 65])
         with pm.Model() as model:
             # Al Bashir hospital market share
-            market_share = pm.Uniform('market_share', 0.5, 0.6)
+            market_share = pm.Uniform("market_share", 0.5, 0.6)
             # Number of 1 y.o. in Amman
-            n_amman = pm.Binomial('n_amman', kids, amman_prop, shape=3)
+            n_amman = pm.Binomial("n_amman", kids, amman_prop, shape=3)
             # Prior probability
-            prev_rsv = pm.Beta('prev_rsv', 1, 5, shape=3)
+            prev_rsv = pm.Beta("prev_rsv", 1, 5, shape=3)
             # RSV in Amman
-            y_amman = pm.Binomial('y_amman', n_amman, prev_rsv, shape=3, testval=100)
+            y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3, testval=100)
             # Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1)
-            pm.Binomial('y_hosp', y_amman, market_share, observed=rsv_cases)
+            pm.Binomial("y_hosp", y_amman, market_share, observed=rsv_cases)
         return model
 
     def test_run(self):
diff --git a/pymc3/tests/test_glm.py b/pymc3/tests/test_glm.py
index aa8844141f..979b593a54 100644
--- a/pymc3/tests/test_glm.py
+++ b/pymc3/tests/test_glm.py
@@ -19,7 +19,7 @@ def setup_class(cls):
         super(TestGLM, cls).setup_class()
         cls.intercept = 1
         cls.slope = 3
-        cls.sd = .05
+        cls.sd = 0.05
         x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000)
         cls.y_linear += np.random.normal(size=1000, scale=cls.sd)
         cls.data_linear = pd.DataFrame(dict(x=x_linear, y=cls.y_linear))
@@ -35,85 +35,113 @@ def setup_class(cls):
 
     def test_linear_component(self):
         with Model() as model:
-            lm = LinearComponent.from_formula('y ~ x', self.data_linear)
-            sigma = Uniform('sigma', 0, 20)
-            Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear)
+            lm = LinearComponent.from_formula("y ~ x", self.data_linear)
+            sigma = Uniform("sigma", 0, 20)
+            Normal("y_obs", mu=lm.y_est, sd=sigma, observed=self.y_linear)
             start = find_MAP(vars=[sigma])
             step = Slice(model.vars)
-            trace = sample(500, tune=0, step=step, start=start,
-                           progressbar=False, random_seed=self.random_seed)
+            trace = sample(
+                500,
+                tune=0,
+                step=step,
+                start=start,
+                progressbar=False,
+                random_seed=self.random_seed,
+            )
 
-            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
-            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
+            assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0
+            assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0
 
     def test_glm(self):
         with Model() as model:
-            GLM.from_formula('y ~ x', self.data_linear)
+            GLM.from_formula("y ~ x", self.data_linear)
             step = Slice(model.vars)
-            trace = sample(500, step=step, tune=0, progressbar=False,
-                           random_seed=self.random_seed)
+            trace = sample(
+                500, step=step, tune=0, progressbar=False, random_seed=self.random_seed
+            )
 
-            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
-            assert round(abs(np.mean(trace['sd'])-self.sd), 1) == 0
+            assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0
+            assert round(abs(np.mean(trace["sd"]) - self.sd), 1) == 0
 
     def test_glm_offset(self):
-        offset = 1.
+        offset = 1.0
         with Model() as model:
-            GLM.from_formula('y ~ x', self.data_linear, offset=offset)
+            GLM.from_formula("y ~ x", self.data_linear, offset=offset)
             step = Slice(model.vars)
-            trace = sample(500, step=step, tune=0, progressbar=False,
-                           random_seed=self.random_seed)
+            trace = sample(
+                500, step=step, tune=0, progressbar=False, random_seed=self.random_seed
+            )
 
-            assert round(abs(np.mean(trace['Intercept'])-self.intercept+offset), 1) == 0
+            assert (
+                round(abs(np.mean(trace["Intercept"]) - self.intercept + offset), 1)
+                == 0
+            )
 
     def test_glm_link_func(self):
         with Model() as model:
-            GLM.from_formula('y ~ x', self.data_logistic,
-                    family=families.Binomial(link=families.logit))
+            GLM.from_formula(
+                "y ~ x",
+                self.data_logistic,
+                family=families.Binomial(link=families.logit),
+            )
             step = Slice(model.vars)
-            trace = sample(1000, step=step, tune=0, progressbar=False,
-                           random_seed=self.random_seed)
+            trace = sample(
+                1000, step=step, tune=0, progressbar=False, random_seed=self.random_seed
+            )
 
-            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
+            assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0
 
     def test_glm_link_func2(self):
         with Model() as model:
-            GLM.from_formula('y ~ x', self.data_logistic2,
-                    family=families.Binomial(priors={'n': self.data_logistic2['n']}))
-            trace = sample(1000, progressbar=False,
-                           random_seed=self.random_seed)
+            GLM.from_formula(
+                "y ~ x",
+                self.data_logistic2,
+                family=families.Binomial(priors={"n": self.data_logistic2["n"]}),
+            )
+            trace = sample(1000, progressbar=False, random_seed=self.random_seed)
 
-            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
+            assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0
 
     def test_more_than_one_glm_is_ok(self):
         with Model():
-            GLM.from_formula('y ~ x', self.data_logistic,
-                    family=families.Binomial(link=families.logit),
-                    name='glm1')
-            GLM.from_formula('y ~ x', self.data_logistic,
-                    family=families.Binomial(link=families.logit),
-                    name='glm2')
+            GLM.from_formula(
+                "y ~ x",
+                self.data_logistic,
+                family=families.Binomial(link=families.logit),
+                name="glm1",
+            )
+            GLM.from_formula(
+                "y ~ x",
+                self.data_logistic,
+                family=families.Binomial(link=families.logit),
+                name="glm2",
+            )
 
     def test_from_xy(self):
         with Model():
-            GLM(self.data_logistic['x'],
-                self.data_logistic['y'],
+            GLM(
+                self.data_logistic["x"],
+                self.data_logistic["y"],
                 family=families.Binomial(link=families.logit),
-                name='glm1')
+                name="glm1",
+            )
 
     def test_boolean_y(self):
-        model = GLM.from_formula('y ~ x', pd.DataFrame(
-                {'x': self.data_logistic['x'],
-                 'y': self.data_logistic['y']}
-            )
+        model = GLM.from_formula(
+            "y ~ x",
+            pd.DataFrame({"x": self.data_logistic["x"], "y": self.data_logistic["y"]}),
         )
-        model_bool = GLM.from_formula('y ~ x', pd.DataFrame(
-                {'x': self.data_logistic['x'],
-                 'y': [bool(i) for i in self.data_logistic['y']]}
-            )
+        model_bool = GLM.from_formula(
+            "y ~ x",
+            pd.DataFrame(
+                {
+                    "x": self.data_logistic["x"],
+                    "y": [bool(i) for i in self.data_logistic["y"]],
+                }
+            ),
         )
         assert_equal(model.y.observations, model_bool.y.observations)
diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py
index 9f52068576..6122d73adc 100644
--- a/pymc3/tests/test_gp.py
+++ b/pymc3/tests/test_gp.py
@@ -18,8 +18,8 @@ def test_value(self):
         with pm.Model() as model:
             zero_mean = pm.gp.mean.Zero()
         M = theano.function([], zero_mean(X))()
-        assert np.all(M==0)
-        assert M.shape == (10, )
+        assert np.all(M == 0)
+        assert M.shape == (10,)
 
 
 class TestConstantMean(object):
@@ -28,8 +28,8 @@ def test_value(self):
         with pm.Model() as model:
             const_mean = pm.gp.mean.Constant(6)
         M = theano.function([], const_mean(X))()
-        assert np.all(M==6)
-        assert M.shape == (10, )
+        assert np.all(M == 6)
+        assert M.shape == (10,)
 
 
 class TestLinearMean(object):
@@ -39,7 +39,7 @@ def test_value(self):
             linear_mean = pm.gp.mean.Linear(2, 0.5)
         M = theano.function([], linear_mean(X))()
         npt.assert_allclose(M[1], 0.7222, atol=1e-3)
-        assert M.shape == (10, )
+        assert M.shape == (10,)
 
 
 class TestAddProdMean(object):
@@ -212,8 +212,16 @@ def test_multiops(self):
         X = np.linspace(0, 1, 3)[:, None]
         M = np.array([[1, 2, 3], [2, 1, 2], [3, 2, 1]])
         with pm.Model() as model:
-            cov1 = 3 + pm.gp.cov.ExpQuad(1, 0.1) + M * pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1)
-            cov2 = pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1) * M + pm.gp.cov.ExpQuad(1, 0.1) + 3
+            cov1 = (
+                3
+                + pm.gp.cov.ExpQuad(1, 0.1)
+                + M * pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1)
+            )
+            cov2 = (
+                pm.gp.cov.ExpQuad(1, 0.1) * M * pm.gp.cov.ExpQuad(1, 0.1) * M
+                + pm.gp.cov.ExpQuad(1, 0.1)
+                + 3
+            )
         K1 = theano.function([], cov1(X))()
         K2 = theano.function([], cov2(X))()
         assert np.allclose(K1, K2)
@@ -247,10 +255,16 @@ def test_multiops(self):
         X2 = cartesian(X21, X22)
         X = cartesian(X1, X21, X22)
         with pm.Model() as model:
-            cov1 = 3 + pm.gp.cov.ExpQuad(1, 0.1) + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1)
+            cov1 = (
+                3
+                + pm.gp.cov.ExpQuad(1, 0.1)
+                + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1)
+            )
             cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
             cov = pm.gp.cov.Kron([cov1, cov2])
-        K_true = kronecker(theano.function([], cov1(X1))(), theano.function([], cov2(X2))()).eval()
+        K_true = kronecker(
+            theano.function([], cov1(X1))(), theano.function([], cov2(X2))()
+        ).eval()
         K = theano.function([], cov(X))()
         npt.assert_allclose(K_true, K)
 
@@ -269,7 +283,7 @@ def test_slice1(self):
     def test_slice2(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
         with pm.Model() as model:
-            cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1,2])
+            cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2])
         K = theano.function([], cov(X))()
         npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3)
         # check diagonal
@@ -279,7 +293,7 @@ def test_slice2(self):
     def test_slice3(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
         with pm.Model() as model:
-            cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1,2])
+            cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2])
         K = theano.function([], cov(X))()
         npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3)
         # check diagonal
@@ -289,7 +303,9 @@ def test_slice3(self):
     def test_diffslice(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
         with pm.Model() as model:
-            cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad(3, ls=[0.1, 0.2, 0.3])
+            cov = pm.gp.cov.ExpQuad(
+                3, ls=0.1, active_dims=[1, 0, 0]
+            ) + pm.gp.cov.ExpQuad(3, ls=[0.1, 0.2, 0.3])
         K = theano.function([], cov(X))()
         npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3)
         # check diagonal
@@ -305,7 +321,7 @@ def test_raises(self):
 
 class TestStability(object):
     def test_stable(self):
-        X = np.random.uniform(low=320., high=400., size=[2000, 2])
+        X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2])
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(2, 0.1)
         dists = theano.function([], cov.square_dist(X, X))()
@@ -365,7 +381,7 @@ def test_1d(self):
             cov = pm.gp.cov.WhiteNoise(sigma=0.5)
         K = theano.function([], cov(X))()
         npt.assert_allclose(K[0, 1], 0.0, atol=1e-3)
-        npt.assert_allclose(K[0, 0], 0.5**2, atol=1e-3)
+        npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3)
         # check diagonal
         Kd = theano.function([], cov(X, diag=True))()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
@@ -507,11 +523,15 @@ def test_1d(self):
 class TestWarpedInput(object):
     def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
+
         def warp_func(x, a, b, c):
             return x + (a * tt.tanh(b * (x - c)))
+
         with pm.Model() as model:
             cov_m52 = pm.gp.cov.Matern52(1, 0.2)
-            cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52)
+            cov = pm.gp.cov.WarpedInput(
+                1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52
+            )
         K = theano.function([], cov(X))()
         npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3)
         K = theano.function([], cov(X, X))()
@@ -531,8 +551,10 @@ def test_raises(self):
 class TestGibbs(object):
     def test_1d(self):
         X = np.linspace(0, 2, 10)[:, None]
+
         def tanh_func(x, x1, x2, w, x0):
             return (x1 + x2) / 2.0 - (x1 - x2) / 2.0 * tt.tanh((x - x0) / w)
+
         with pm.Model() as model:
             cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0))
         K = theano.function([], cov(X))()
@@ -549,17 +571,21 @@ def test_raises(self):
         with pytest.raises(NotImplementedError):
             pm.gp.cov.Gibbs(2, lambda x: x)
         with pytest.raises(NotImplementedError):
-            pm.gp.cov.Gibbs(3, lambda x: x, active_dims=[0,1])
+            pm.gp.cov.Gibbs(3, lambda x: x, active_dims=[0, 1])
 
 
 class TestScaledCov(object):
     def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
+
         def scaling_func(x, a, b):
-            return a + b*x
+            return a + b * x
+
         with pm.Model() as model:
             cov_m52 = pm.gp.cov.Matern52(1, 0.2)
-            cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52)
+            cov = pm.gp.cov.ScaledCov(
+                1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52
+            )
         K = theano.function([], cov(X))()
         npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3)
         K = theano.function([], cov(X, X))()
@@ -580,10 +606,13 @@ class TestHandleArgs(object):
     def test_handleargs(self):
         def func_noargs(x):
             return x
+
         def func_onearg(x, a):
             return x + a
+
         def func_twoarg(x, a, b):
             return x + a + b
+
         x = 100
         a = 2
         b = 3
@@ -612,19 +641,15 @@ def test_full(self):
         with pm.Model() as model:
             B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0])
             npt.assert_allclose(
-                B(np.array([[2, 1.5], [3, -42]])).eval(),
-                self.B[2:4, 2:4]
-                )
+                B(np.array([[2, 1.5], [3, -42]])).eval(), self.B[2:4, 2:4]
+            )
             npt.assert_allclose(B(self.X).eval(), B_mat)
 
     def test_fullB(self):
         B_mat = self.B[self.rand_rows, self.rand_rows.T]
         with pm.Model() as model:
             B = pm.gp.cov.Coregion(1, B=self.B)
-            npt.assert_allclose(
-                B(np.array([[2], [3]])).eval(),
-                self.B[2:4, 2:4]
-                )
+            npt.assert_allclose(B(np.array([[2], [3]])).eval(), self.B[2:4, 2:4])
             npt.assert_allclose(B(self.X).eval(), B_mat)
 
     def test_Xs(self):
@@ -632,9 +657,8 @@ def test_Xs(self):
         with pm.Model() as model:
             B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0])
             npt.assert_allclose(
-                B(np.array([[2, 1.5]]), np.array([[3, -42]])).eval(),
-                self.B[2, 3]
-                )
+                B(np.array([[2, 1.5]]), np.array([[3, -42]])).eval(), self.B[2, 3]
+            )
             npt.assert_allclose(B(self.X, self.Xs).eval(), B_mat)
 
     def test_diag(self):
@@ -642,9 +666,8 @@ def test_diag(self):
         with pm.Model() as model:
             B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0])
             npt.assert_allclose(
-                B(np.array([[2, 1.5]]), diag=True).eval(),
-                np.diag(self.B)[2]
-                )
+                B(np.array([[2, 1.5]]), diag=True).eval(), np.diag(self.B)[2]
+            )
             npt.assert_allclose(B(self.X, diag=True).eval(), B_diag)
 
     def test_raises(self):
@@ -664,19 +687,22 @@ def test_raises3(self):
 
 
 class TestMarginalVsLatent(object):
-    R"""
+    r"""
     Compare the logp of models Marginal, noise=0 and Latent.
     """
+
     def setup_method(self):
-        X = np.random.randn(50,3)
-        y = np.random.randn(50)*0.01
+        X = np.random.randn(50, 3)
+        y = np.random.randn(50) * 0.01
         Xnew = np.random.randn(60, 3)
-        pnew = np.random.randn(60)*0.01
+        pnew = np.random.randn(60) * 0.01
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
             gp = pm.gp.Marginal(mean_func, cov_func)
-            f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y)
+            f = gp.marginal_likelihood(
+                "f", X, y, noise=0.0, is_observed=False, observed=y
+            )
             p = gp.conditional("p", Xnew)
         self.logp = model.logp({"p": pnew})
         self.X = X
@@ -708,15 +734,16 @@ def testLatent2(self):
 
 
 class TestMarginalVsMarginalSparse(object):
-    R"""
+    r"""
     Compare logp of models Marginal and MarginalSparse.
     Should be nearly equal when inducing points are same as inputs.
     """
+
     def setup_method(self):
-        X = np.random.randn(50,3)
-        y = np.random.randn(50)*0.01
+        X = np.random.randn(50, 3)
+        y = np.random.randn(50) * 0.01
         Xnew = np.random.randn(60, 3)
-        pnew = np.random.randn(60)*0.01
+        pnew = np.random.randn(60) * 0.01
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
@@ -732,7 +759,7 @@ def setup_method(self):
         self.pnew = pnew
         self.gp = gp
 
-    @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC'])
+    @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
     def testApproximations(self, approx):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
@@ -743,7 +770,7 @@ def testApproximations(self, approx):
         approx_logp = model.logp({"f": self.y, "p": self.pnew})
         npt.assert_allclose(approx_logp, self.logp, atol=0, rtol=1e-2)
 
-    @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC'])
+    @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
     def testPredictVar(self, approx):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
@@ -760,7 +787,9 @@ def testPredictCov(self):
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
             gp = pm.gp.MarginalSparse(mean_func, cov_func, approx="DTC")
-            f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False)
+            f = gp.marginal_likelihood(
+                "f", self.X, self.X, self.y, self.sigma, is_observed=False
+            )
         mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True)
         mu2, cov2 = gp.predict(self.Xnew, pred_noise=True)
         npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3)
@@ -769,16 +798,20 @@ def testPredictCov(self):
 
 class TestGPAdditive(object):
     def setup_method(self):
-        self.X = np.random.randn(50,3)
-        self.y = np.random.randn(50)*0.01
+        self.X = np.random.randn(50, 3)
+        self.y = np.random.randn(50) * 0.01
         self.Xnew = np.random.randn(60, 3)
         self.noise = pm.gp.cov.WhiteNoise(0.1)
-        self.covs = (pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]),
-                     pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]),
-                     pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]))
-        self.means = (pm.gp.mean.Constant(0.5),
-                      pm.gp.mean.Constant(0.5),
-                      pm.gp.mean.Constant(0.5))
+        self.covs = (
+            pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]),
+            pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]),
+            pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]),
+        )
+        self.means = (
+            pm.gp.mean.Constant(0.5),
+            pm.gp.mean.Constant(0.5),
+            pm.gp.mean.Constant(0.5),
+        )
 
     def testAdditiveMarginal(self):
         with pm.Model() as model1:
@@ -797,15 +830,20 @@ def testAdditiveMarginal(self):
         npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
 
         with model1:
-            fp1 = gpsum.conditional("fp1", self.Xnew, given={"X": self.X, "y": self.y,
-                                                            "noise": self.noise, "gp": gpsum})
+            fp1 = gpsum.conditional(
+                "fp1",
+                self.Xnew,
+                given={"X": self.X, "y": self.y, "noise": self.noise, "gp": gpsum},
+            )
         with model2:
             fp2 = gptot.conditional("fp2", self.Xnew)
 
         fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2)
+        npt.assert_allclose(
+            fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2
+        )
 
-    @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC'])
+    @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
     def testAdditiveMarginalSparse(self, approx):
         Xu = np.random.randn(10, 3)
         sigma = 0.1
@@ -819,19 +857,26 @@ def testAdditiveMarginalSparse(self, approx):
             model1_logp = model1.logp({"fsum": self.y})
 
         with pm.Model() as model2:
-            gptot = pm.gp.MarginalSparse(reduce(add, self.means), reduce(add, self.covs), approx=approx)
+            gptot = pm.gp.MarginalSparse(
+                reduce(add, self.means), reduce(add, self.covs), approx=approx
+            )
             fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma)
             model2_logp = model2.logp({"fsum": self.y})
         npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
 
         with model1:
-            fp1 = gpsum.conditional("fp1", self.Xnew, given={"X": self.X, "Xu": Xu, "y": self.y,
-                                                            "sigma": sigma, "gp": gpsum})
+            fp1 = gpsum.conditional(
+                "fp1",
+                self.Xnew,
+                given={"X": self.X, "Xu": Xu, "y": self.y, "sigma": sigma, "gp": gpsum},
+            )
         with model2:
             fp2 = gptot.conditional("fp2", self.Xnew)
 
         fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2)
+        npt.assert_allclose(
+            fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2
+        )
 
     def testAdditiveLatent(self):
         with pm.Model() as model1:
@@ -850,13 +895,19 @@ def testAdditiveLatent(self):
         npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
 
         with model1:
-            fp1 = gpsum.conditional("fp1", self.Xnew, given={"X": self.X, "f": self.y, "gp": gpsum})
+            fp1 = gpsum.conditional(
+                "fp1", self.Xnew, given={"X": self.X, "f": self.y, "gp": gpsum}
+            )
         with model2:
             fp2 = gptot.conditional("fp2", self.Xnew)
 
         fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(fp1.logp({"fsum": self.y, "fp1": fp}),
-                            fp2.logp({"fsum": self.y, "fp2": fp}), atol=0, rtol=1e-2)
+        npt.assert_allclose(
+            fp1.logp({"fsum": self.y, "fp1": fp}),
+            fp2.logp({"fsum": self.y, "fp2": fp}),
+            atol=0,
+            rtol=1e-2,
+        )
 
     def testAdditiveSparseRaises(self):
         # cant add different approximations
@@ -885,14 +936,15 @@ def testAdditiveTypeRaises2(self):
 
 
 class TestTP(object):
-    R"""
+    r"""
     Compare TP with high degress of freedom to GP
     """
+
     def setup_method(self):
-        X = np.random.randn(20,3)
-        y = np.random.randn(20)*0.01
+        X = np.random.randn(20, 3)
+        y = np.random.randn(20) * 0.01
         Xnew = np.random.randn(50, 3)
-        pnew = np.random.randn(50)*0.01
+        pnew = np.random.randn(50) * 0.01
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             gp = pm.gp.Latent(cov_func=cov_func)
@@ -923,7 +975,9 @@ def testTPvsLatentReparameterized(self):
         chol = np.linalg.cholesky(cov_func(self.X).eval())
         y_rotated = np.linalg.solve(chol, self.y)
         # testing full model logp unreliable due to introduction of chi2__log__
-        plogp = p.logp({"f_rotated_": y_rotated, "p": self.pnew, "chi2__log__": np.log(1e20)})
+        plogp = p.logp(
+            {"f_rotated_": y_rotated, "p": self.pnew, "chi2__log__": np.log(1e20)}
+        )
         npt.assert_allclose(self.plogp, plogp, atol=0, rtol=1e-2)
 
     def testAdditiveTPRaises(self):
@@ -939,23 +993,30 @@ class TestLatentKron(object):
     """
     Compare gp.LatentKron to gp.Latent, both with Gaussian noise.
     """
+
     def setup_method(self):
-        self.Xs = [np.linspace(0, 1, 7)[:, None],
-                   np.linspace(0, 1, 5)[:, None],
-                   np.linspace(0, 1, 6)[:, None]]
+        self.Xs = [
+            np.linspace(0, 1, 7)[:, None],
+            np.linspace(0, 1, 5)[:, None],
+            np.linspace(0, 1, 6)[:, None],
+        ]
         self.X = cartesian(*self.Xs)
         self.N = np.prod([len(X) for X in self.Xs])
         self.y = np.random.randn(self.N) * 0.1
-        self.Xnews = (np.random.randn(5, 1),
-                      np.random.randn(5, 1),
-                      np.random.randn(5, 1))
+        self.Xnews = (
+            np.random.randn(5, 1),
+            np.random.randn(5, 1),
+            np.random.randn(5, 1),
+        )
         self.Xnew = np.concatenate(self.Xnews, axis=1)
-        self.pnew = np.random.randn(len(self.Xnew))*0.01
+        self.pnew = np.random.randn(len(self.Xnew)) * 0.01
         ls = 0.2
         with pm.Model() as latent_model:
-            self.cov_funcs = (pm.gp.cov.ExpQuad(1, ls),
-                              pm.gp.cov.ExpQuad(1, ls),
-                              pm.gp.cov.ExpQuad(1, ls))
+            self.cov_funcs = (
+                pm.gp.cov.ExpQuad(1, ls),
+                pm.gp.cov.ExpQuad(1, ls),
+                pm.gp.cov.ExpQuad(1, ls),
+            )
             cov_func = pm.gp.cov.Kron(self.cov_funcs)
             self.mean = pm.gp.mean.Constant(0.5)
             gp = pm.gp.Latent(mean_func=self.mean, cov_func=cov_func)
@@ -967,53 +1028,59 @@ def setup_method(self):
 
     def testLatentKronvsLatent(self):
         with pm.Model() as kron_model:
-            kron_gp = pm.gp.LatentKron(mean_func=self.mean,
-                                       cov_funcs=self.cov_funcs)
-            f = kron_gp.prior('f', self.Xs)
-            p = kron_gp.conditional('p', self.Xnew)
-        kronlatent_logp = kron_model.logp({"f_rotated_": self.y_rotated, "p": self.pnew})
+            kron_gp = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
+            f = kron_gp.prior("f", self.Xs)
+            p = kron_gp.conditional("p", self.Xnew)
+        kronlatent_logp = kron_model.logp(
+            {"f_rotated_": self.y_rotated, "p": self.pnew}
+        )
         npt.assert_allclose(kronlatent_logp, self.logp, atol=0, rtol=1e-3)
 
     def testLatentKronRaisesAdditive(self):
         with pm.Model() as kron_model:
-            gp1 = pm.gp.LatentKron(mean_func=self.mean,
-                                   cov_funcs=self.cov_funcs)
-            gp2 = pm.gp.LatentKron(mean_func=self.mean,
-                                   cov_funcs=self.cov_funcs)
+            gp1 = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
+            gp2 = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
         with pytest.raises(TypeError):
             gp1 + gp2
 
     def testLatentKronRaisesSizes(self):
         with pm.Model() as kron_model:
-            gp = pm.gp.LatentKron(mean_func=self.mean,
-                                  cov_funcs=self.cov_funcs)
+            gp = pm.gp.LatentKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
         with pytest.raises(ValueError):
-            gp.prior("f", Xs=[np.linspace(0, 1, 7)[:, None],
-                              np.linspace(0, 1, 5)[:, None]])
+            gp.prior(
+                "f", Xs=[np.linspace(0, 1, 7)[:, None], np.linspace(0, 1, 5)[:, None]]
+            )
 
 
 class TestMarginalKron(object):
     """
     Compare gp.MarginalKron to gp.Marginal.
     """
+
     def setup_method(self):
-        self.Xs = [np.linspace(0, 1, 7)[:, None],
-                   np.linspace(0, 1, 5)[:, None],
-                   np.linspace(0, 1, 6)[:, None]]
+        self.Xs = [
+            np.linspace(0, 1, 7)[:, None],
+            np.linspace(0, 1, 5)[:, None],
+            np.linspace(0, 1, 6)[:, None],
+        ]
         self.X = cartesian(*self.Xs)
         self.N = np.prod([len(X) for X in self.Xs])
         self.y = np.random.randn(self.N) * 0.1
-        self.Xnews = (np.random.randn(5, 1),
-                      np.random.randn(5, 1),
-                      np.random.randn(5, 1))
+        self.Xnews = (
+            np.random.randn(5, 1),
+            np.random.randn(5, 1),
+            np.random.randn(5, 1),
+        )
         self.Xnew = np.concatenate(self.Xnews, axis=1)
         self.sigma = 0.2
-        self.pnew = np.random.randn(len(self.Xnew))*0.01
+        self.pnew = np.random.randn(len(self.Xnew)) * 0.01
         ls = 0.2
         with pm.Model() as model:
-            self.cov_funcs = [pm.gp.cov.ExpQuad(1, ls),
-                              pm.gp.cov.ExpQuad(1, ls),
-                              pm.gp.cov.ExpQuad(1, ls)]
+            self.cov_funcs = [
+                pm.gp.cov.ExpQuad(1, ls),
+                pm.gp.cov.ExpQuad(1, ls),
+                pm.gp.cov.ExpQuad(1, ls),
+            ]
             cov_func = pm.gp.cov.Kron(self.cov_funcs)
             self.mean = pm.gp.mean.Constant(0.5)
             gp = pm.gp.Marginal(mean_func=self.mean, cov_func=cov_func)
@@ -1024,30 +1091,28 @@ def setup_method(self):
 
     def testMarginalKronvsMarginalpredict(self):
         with pm.Model() as kron_model:
-            kron_gp = pm.gp.MarginalKron(mean_func=self.mean,
-                                         cov_funcs=self.cov_funcs)
-            f = kron_gp.marginal_likelihood('f', self.Xs, self.y,
-                                            sigma=self.sigma, shape=self.N)
-            p = kron_gp.conditional('p', self.Xnew)
+            kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
+            f = kron_gp.marginal_likelihood(
+                "f", self.Xs, self.y, sigma=self.sigma, shape=self.N
+            )
+            p = kron_gp.conditional("p", self.Xnew)
             mu, cov = kron_gp.predict(self.Xnew)
         npt.assert_allclose(mu, self.mu, atol=0, rtol=1e-2)
         npt.assert_allclose(cov, self.cov, atol=0, rtol=1e-2)
 
     def testMarginalKronvsMarginal(self):
         with pm.Model() as kron_model:
-            kron_gp = pm.gp.MarginalKron(mean_func=self.mean,
-                                         cov_funcs=self.cov_funcs)
-            f = kron_gp.marginal_likelihood('f', self.Xs, self.y,
-                                            sigma=self.sigma, shape=self.N)
-            p = kron_gp.conditional('p', self.Xnew)
-        kron_logp = kron_model.logp({'p': self.pnew})
+            kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
+            f = kron_gp.marginal_likelihood(
+                "f", self.Xs, self.y, sigma=self.sigma, shape=self.N
+            )
+            p = kron_gp.conditional("p", self.Xnew)
+        kron_logp = kron_model.logp({"p": self.pnew})
         npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2)
 
     def testMarginalKronRaises(self):
         with pm.Model() as kron_model:
-            gp1 = pm.gp.MarginalKron(mean_func=self.mean,
-                                     cov_funcs=self.cov_funcs)
-            gp2 = pm.gp.MarginalKron(mean_func=self.mean,
-                                     cov_funcs=self.cov_funcs)
+            gp1 = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
+            gp2 = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
         with pytest.raises(TypeError):
             gp1 + gp2
diff --git a/pymc3/tests/test_hdf5_backend.py b/pymc3/tests/test_hdf5_backend.py
index ddbf58b08a..1b782e84db 100644
--- a/pymc3/tests/test_hdf5_backend.py
+++ b/pymc3/tests/test_hdf5_backend.py
@@ -4,19 +4,12 @@
 import os
 import tempfile
 
-STATS1 = [{
-    'a': np.float64,
-    'b': np.bool
-}]
-
-STATS2 = [{
-    'a': np.float64
-}, {
-    'a': np.float64,
-    'b': np.int64,
-}]
-
-DBNAME = os.path.join(tempfile.gettempdir(), 'test.h5')
+STATS1 = [{"a": np.float64, "b": np.bool}]
+
+STATS2 = [{"a": np.float64}, {"a": np.float64, "b": np.int64}]
+
+DBNAME = os.path.join(tempfile.gettempdir(), "test.h5")
+
 
 class TestHDF50dSampling(bf.SamplingTestCase):
     backend = hdf5.HDF5
diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py
index ed50a0494e..5cfc8c8189 100644
--- a/pymc3/tests/test_hmc.py
+++ b/pymc3/tests/test_hmc.py
@@ -7,7 +7,9 @@
 import pytest
 import logging
 from pymc3.theanof import floatX
-logger = logging.getLogger('pymc3')
+
+logger = logging.getLogger("pymc3")
+
 
 def test_leapfrog_reversible():
     n = 3
@@ -20,7 +22,7 @@ def test_leapfrog_reversible():
     p = floatX(step.potential.random())
     q = floatX(np.random.randn(size))
     start = step.integrator.compute_state(p, q)
-    for epsilon in [.01, .1]:
+    for epsilon in [0.01, 0.1]:
         for n_steps in [1, 2, 3, 4, 20]:
             state = start
             for _ in range(n_steps):
@@ -39,14 +41,17 @@ def test_nuts_tuning():
         trace = pymc3.sample(10, step=step, tune=5, progressbar=False, chains=1)
 
     assert not step.tune
-    assert np.all(trace['step_size'][5:] == trace['step_size'][5])
+    assert np.all(trace["step_size"][5:] == trace["step_size"][5])
+
 
 def test_nuts_error_reporting(caplog):
     model = pymc3.Model()
     with caplog.at_level(logging.ERROR) and pytest.raises(ValueError):
         with model:
-            pymc3.HalfNormal('a', sd=1, transform=None, testval=-1)
-            pymc3.HalfNormal('b', sd=1, transform=None)
-            trace = pymc3.sample(init='adapt_diag', chains=1)
-        assert "Bad initial energy, check any log  probabilities that are inf or -inf: a        -inf\nb" in caplog.text
-
+            pymc3.HalfNormal("a", sd=1, transform=None, testval=-1)
+            pymc3.HalfNormal("b", sd=1, transform=None)
+            trace = pymc3.sample(init="adapt_diag", chains=1)
+        assert (
+            "Bad initial energy, check any log  probabilities that are inf or -inf: a        -inf\nb"
+            in caplog.text
+        )
diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py
index b231b08127..a90b039ffe 100644
--- a/pymc3/tests/test_math.py
+++ b/pymc3/tests/test_math.py
@@ -4,8 +4,18 @@
 import theano.tensor as tt
 from theano.tests import unittest_tools as utt
 from pymc3.math import (
-    LogDet, logdet, probit, invprobit, expand_packed_triangular,
-    log1pexp, log1mexp, kronecker, cartesian, kron_dot, kron_solve_lower)
+    LogDet,
+    logdet,
+    probit,
+    invprobit,
+    expand_packed_triangular,
+    log1pexp,
+    log1mexp,
+    kronecker,
+    cartesian,
+    kron_dot,
+    kron_solve_lower,
+)
 from .helpers import SeededTest
 import pytest
 from pymc3.theanof import floatX
@@ -14,14 +24,13 @@
 def test_kronecker():
     np.random.seed(1)
     # Create random matrices
-    [a, b, c] = [np.random.rand(3, 3+i) for i in range(3)]
+    [a, b, c] = [np.random.rand(3, 3 + i) for i in range(3)]
 
-    custom = kronecker(a, b, c)       # Custom version
+    custom = kronecker(a, b, c)  # Custom version
     nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c))
     np.testing.assert_array_almost_equal(
-        custom.eval(),
-        nested.eval()   # Standard nested version
-        )
+        custom.eval(), nested.eval()  # Standard nested version
+    )
 
 
 def test_cartesian():
@@ -30,20 +39,21 @@ def test_cartesian():
     b = [0, 2]
     c = [5, 6]
     manual_cartesian = np.array(
-        [[1, 0, 5],
-         [1, 0, 6],
-         [1, 2, 5],
-         [1, 2, 6],
-         [2, 0, 5],
-         [2, 0, 6],
-         [2, 2, 5],
-         [2, 2, 6],
-         [3, 0, 5],
-         [3, 0, 6],
-         [3, 2, 5],
-         [3, 2, 6],
-         ]
-        )
+        [
+            [1, 0, 5],
+            [1, 0, 6],
+            [1, 2, 5],
+            [1, 2, 6],
+            [2, 0, 5],
+            [2, 0, 6],
+            [2, 2, 5],
+            [2, 2, 6],
+            [3, 0, 5],
+            [3, 0, 6],
+            [3, 2, 5],
+            [3, 2, 6],
+        ]
+    )
     auto_cart = cartesian(a, b, c)
     np.testing.assert_array_almost_equal(manual_cartesian, auto_cart)
 
@@ -88,16 +98,19 @@ def test_log1pexp():
     # import mpmath
     # mpmath.mp.dps = 1000
     # [float(mpmath.log(1 + mpmath.exp(x))) for x in vals]
-    expected = np.array([
-        0.0,
-        3.720075976020836e-44,
-        4.539889921686465e-05,
-        0.6930971818099453,
-        0.6931471805599453,
-        0.6931971818099453,
-        10.000045398899218,
-        100.0,
-        1e+20])
+    expected = np.array(
+        [
+            0.0,
+            3.720075976020836e-44,
+            4.539889921686465e-05,
+            0.6930971818099453,
+            0.6931471805599453,
+            0.6931971818099453,
+            10.000045398899218,
+            100.0,
+            1e20,
+        ]
+    )
     actual = log1pexp(vals).eval()
     npt.assert_allclose(actual, expected)
 
@@ -107,14 +120,17 @@ def test_log1mexp():
     # import mpmath
     # mpmath.mp.dps = 1000
     # [float(mpmath.log(1 - mpmath.exp(-x))) for x in vals]
-    expected = np.array([
-        np.nan,
-        -np.inf,
-        -46.051701859880914,
-        -9.210390371559516,
-        -4.540096037048921e-05,
-        -3.720075976020836e-44,
-        0.0])
+    expected = np.array(
+        [
+            np.nan,
+            -np.inf,
+            -46.051701859880914,
+            -9.210390371559516,
+            -4.540096037048921e-05,
+            -3.720075976020836e-44,
+            0.0,
+        ]
+    )
     actual = log1mexp(vals).eval()
     npt.assert_allclose(actual, expected)
 
@@ -140,8 +156,10 @@ def validate(self, input_mat):
         # Test gradient:
         utt.verify_grad(self.op, [input_mat])
 
-    @pytest.mark.skipif(theano.config.device in ["cuda", "gpu"],
-                        reason="No logDet implementation on GPU.")
+    @pytest.mark.skipif(
+        theano.config.device in ["cuda", "gpu"],
+        reason="No logDet implementation on GPU.",
+    )
     def test_basic(self):
         # Calls validate with different params
         test_case_1 = np.random.randn(3, 3) / np.sqrt(3)
@@ -152,11 +170,11 @@ def test_basic(self):
 
 def test_expand_packed_triangular():
     with pytest.raises(ValueError):
-        x = tt.matrix('x')
-        x.tag.test_value = np.array([[1.]])
+        x = tt.matrix("x")
+        x.tag.test_value = np.array([[1.0]])
         expand_packed_triangular(5, x)
     N = 5
-    packed = tt.vector('packed')
+    packed = tt.vector("packed")
     packed.tag.test_value = floatX(np.zeros(N * (N + 1) // 2))
     with pytest.raises(TypeError):
         expand_packed_triangular(packed.shape[0], packed)
@@ -168,9 +186,17 @@ def test_expand_packed_triangular():
     upper_packed = floatX(vals[upper != 0])
     expand_lower = expand_packed_triangular(N, packed, lower=True)
     expand_upper = expand_packed_triangular(N, packed, lower=False)
-    expand_diag_lower = expand_packed_triangular(N, packed, lower=True, diagonal_only=True)
-    expand_diag_upper = expand_packed_triangular(N, packed, lower=False, diagonal_only=True)
+    expand_diag_lower = expand_packed_triangular(
+        N, packed, lower=True, diagonal_only=True
+    )
+    expand_diag_upper = expand_packed_triangular(
+        N, packed, lower=False, diagonal_only=True
+    )
     assert np.all(expand_lower.eval({packed: lower_packed}) == lower)
     assert np.all(expand_upper.eval({packed: upper_packed}) == upper)
-    assert np.all(expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals)))
-    assert np.all(expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals)))
+    assert np.all(
+        expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals))
+    )
+    assert np.all(
+        expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals))
+    )
diff --git a/pymc3/tests/test_memo.py b/pymc3/tests/test_memo.py
index 395595678b..51621f2845 100644
--- a/pymc3/tests/test_memo.py
+++ b/pymc3/tests/test_memo.py
@@ -3,14 +3,15 @@
 
 def getmemo():
     @memoize
-    def f(a, b=('a')):
+    def f(a, b=("a")):
         return str(a) + str(b)
+
     return f
 
 
 def test_memo():
     f = getmemo()
 
-    assert f('x', ['y', 'z']) == "x['y', 'z']"
-    assert f('x', ['a', 'z']) == "x['a', 'z']"
-    assert f('x', ['y', 'z']) == "x['y', 'z']"
+    assert f("x", ["y", "z"]) == "x['y', 'z']"
+    assert f("x", ["a", "z"]) == "x['a', 'z']"
+    assert f("x", ["y", "z"]) == "x['y', 'z']"
diff --git a/pymc3/tests/test_minibatches.py b/pymc3/tests/test_minibatches.py
index 01f4d13361..c93e2a134a 100644
--- a/pymc3/tests/test_minibatches.py
+++ b/pymc3/tests/test_minibatches.py
@@ -17,8 +17,9 @@ class _DataSampler(object):
     """
     Not for users
     """
-    def __init__(self, data, batchsize=50, random_seed=42, dtype='floatX'):
-        self.dtype = theano.config.floatX if dtype == 'floatX' else dtype
+
+    def __init__(self, data, batchsize=50, random_seed=42, dtype="floatX"):
+        self.dtype = theano.config.floatX if dtype == "floatX" else dtype
         self.rng = np.random.RandomState(random_seed)
         self.data = data
         self.n = batchsize
@@ -27,17 +28,15 @@ def __iter__(self):
         return self
 
     def __next__(self):
-        idx = (self.rng
-               .uniform(size=self.n,
-                        low=0.0,
-                        high=self.data.shape[0] - 1e-16)
-               .astype('int64'))
+        idx = self.rng.uniform(
+            size=self.n, low=0.0, high=self.data.shape[0] - 1e-16
+        ).astype("int64")
         return np.asarray(self.data[idx], self.dtype)
 
     next = __next__
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def datagen():
     return _DataSampler(np.random.uniform(size=(1000, 10)))
 
@@ -56,9 +55,8 @@ def integers_ndim(ndim):
         i += 1
 
 
-@pytest.mark.usefixtures('strict_float32')
+@pytest.mark.usefixtures("strict_float32")
 class TestGenerator(object):
-
     def test_basic(self):
         generator = GeneratorAdapter(integers())
         gop = GeneratorOp(generator)()
@@ -130,7 +128,7 @@ def test_gen_cloning_with_shape_change(self, datagen):
         res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
         assert res.eval().shape == (50,)
         shared = theano.shared(datagen.data.astype(gen.dtype))
-        res2 = theano.clone(res, {gen: shared**2})
+        res2 = theano.clone(res, {gen: shared ** 2})
         assert res2.eval().shape == (1000,)
 
 
@@ -152,13 +150,14 @@ class TestScaling(object):
     """
     Related to minibatch training
     """
+
     def test_density_scaling(self):
         with pm.Model() as model1:
-            Normal('n', observed=[[1]], total_size=1)
+            Normal("n", observed=[[1]], total_size=1)
             p1 = theano.function([], model1.logpt)
 
         with pm.Model() as model2:
-            Normal('n', observed=[[1]], total_size=2)
+            Normal("n", observed=[[1]], total_size=2)
             p2 = theano.function([], model2.logpt)
         assert p1() * 2 == p2()
 
@@ -169,15 +168,16 @@ def true_dens():
             g = gen1()
             for i, point in enumerate(g):
                 yield stats.norm.logpdf(point).sum() * 10
+
         t = true_dens()
         # We have same size models
         with pm.Model() as model1:
-            Normal('n', observed=gen1(), total_size=100)
+            Normal("n", observed=gen1(), total_size=100)
             p1 = theano.function([], model1.logpt)
 
         with pm.Model() as model2:
             gen_var = generator(gen2())
-            Normal('n', observed=gen_var, total_size=100)
+            Normal("n", observed=gen_var, total_size=100)
             p2 = theano.function([], model2.logpt)
 
         for i in range(10):
@@ -190,13 +190,13 @@ def true_dens():
     def test_gradient_with_scaling(self):
         with pm.Model() as model1:
             genvar = generator(gen1())
-            m = Normal('m')
-            Normal('n', observed=genvar, total_size=1000)
+            m = Normal("m")
+            Normal("n", observed=genvar, total_size=1000)
             grad1 = theano.function([m], tt.grad(model1.logpt, m))
         with pm.Model() as model2:
-            m = Normal('m')
+            m = Normal("m")
             shavar = theano.shared(np.ones((1000, 100)))
-            Normal('n', observed=shavar)
+            Normal("n", observed=shavar)
             grad2 = theano.function([m], tt.grad(model2.logpt, m))
 
         for i in range(10):
@@ -207,78 +207,78 @@ def test_gradient_with_scaling(self):
 
     def test_multidim_scaling(self):
         with pm.Model() as model0:
-            Normal('n', observed=[[1, 1],
-                                  [1, 1]], total_size=[])
+            Normal("n", observed=[[1, 1], [1, 1]], total_size=[])
             p0 = theano.function([], model0.logpt)
 
         with pm.Model() as model1:
-            Normal('n', observed=[[1, 1],
-                                  [1, 1]], total_size=[2, 2])
+            Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2])
             p1 = theano.function([], model1.logpt)
 
         with pm.Model() as model2:
-            Normal('n', observed=[[1],
-                                  [1]], total_size=[2, 2])
+            Normal("n", observed=[[1], [1]], total_size=[2, 2])
             p2 = theano.function([], model2.logpt)
 
         with pm.Model() as model3:
-            Normal('n', observed=[[1, 1]], total_size=[2, 2])
+            Normal("n", observed=[[1, 1]], total_size=[2, 2])
             p3 = theano.function([], model3.logpt)
 
         with pm.Model() as model4:
-            Normal('n', observed=[[1]], total_size=[2, 2])
+            Normal("n", observed=[[1]], total_size=[2, 2])
             p4 = theano.function([], model4.logpt)
 
         with pm.Model() as model5:
-            Normal('n', observed=[[1]], total_size=[2, Ellipsis, 2])
+            Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2])
             p5 = theano.function([], model5.logpt)
         assert p0() == p1() == p2() == p3() == p4() == p5()
 
     def test_common_errors(self):
         with pm.Model():
             with pytest.raises(ValueError) as e:
-                Normal('n', observed=[[1]], total_size=[2, Ellipsis, 2, 2])
-            assert 'Length of' in str(e.value)
+                Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2, 2])
+            assert "Length of" in str(e.value)
             with pytest.raises(ValueError) as e:
-                Normal('n', observed=[[1]], total_size=[2, 2, 2])
-            assert 'Length of' in str(e.value)
+                Normal("n", observed=[[1]], total_size=[2, 2, 2])
+            assert "Length of" in str(e.value)
             with pytest.raises(TypeError) as e:
-                Normal('n', observed=[[1]], total_size='foo')
-            assert 'Unrecognized' in str(e.value)
+                Normal("n", observed=[[1]], total_size="foo")
+            assert "Unrecognized" in str(e.value)
             with pytest.raises(TypeError) as e:
-                Normal('n', observed=[[1]], total_size=['foo'])
-            assert 'Unrecognized' in str(e.value)
+                Normal("n", observed=[[1]], total_size=["foo"])
+            assert "Unrecognized" in str(e.value)
             with pytest.raises(ValueError) as e:
-                Normal('n', observed=[[1]], total_size=[Ellipsis, Ellipsis])
-            assert 'Double Ellipsis' in str(e.value)
+                Normal("n", observed=[[1]], total_size=[Ellipsis, Ellipsis])
+            assert "Double Ellipsis" in str(e.value)
 
     def test_mixed1(self):
         with pm.Model():
             data = np.random.rand(10, 20, 30, 40, 50)
             mb = pm.Minibatch(data, [2, None, 20, Ellipsis, 10])
-            Normal('n', observed=mb, total_size=(10, None, 30, Ellipsis, 50))
+            Normal("n", observed=mb, total_size=(10, None, 30, Ellipsis, 50))
 
     def test_mixed2(self):
         with pm.Model():
             data = np.random.rand(10, 20, 30, 40, 50)
             mb = pm.Minibatch(data, [2, None, 20])
-            Normal('n', observed=mb, total_size=(10, None, 30))
+            Normal("n", observed=mb, total_size=(10, None, 30))
 
     def test_free_rv(self):
         with pm.Model() as model4:
-            Normal('n', observed=[[1, 1],
-                                  [1, 1]], total_size=[2, 2])
+            Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2])
             p4 = theano.function([], model4.logpt)
 
         with pm.Model() as model5:
-            Normal('n', total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False))
+            Normal(
+                "n",
+                total_size=[2, Ellipsis, 2],
+                shape=(1, 1),
+                broadcastable=(False, False),
+            )
             p5 = theano.function([model5.n], model5.logpt)
         assert p4() == p5(pm.floatX([[1]]))
-        assert p4() == p5(pm.floatX([[1, 1],
-                                     [1, 1]]))
+        assert p4() == p5(pm.floatX([[1, 1], [1, 1]]))
 
 
-@pytest.mark.usefixtures('strict_float32')
+@pytest.mark.usefixtures("strict_float32")
 class TestMinibatch(object):
     data = np.random.rand(30, 10, 40, 10, 50)
 
diff --git a/pymc3/tests/test_missing.py b/pymc3/tests/test_missing.py
index 1258ed3ea2..0da46ef27d 100644
--- a/pymc3/tests/test_missing.py
+++ b/pymc3/tests/test_missing.py
@@ -7,8 +7,8 @@
 def test_missing():
     data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
     with Model() as model:
-        x = Normal('x', 1, 1)
-        Normal('y', x, 1, observed=data)
+        x = Normal("x", 1, 1)
+        Normal("y", x, 1, observed=data)
 
     y_missing, = model.missing_values
     assert y_missing.tag.test_value.shape == (2,)
@@ -19,8 +19,8 @@ def test_missing():
 def test_missing_pandas():
     data = pd.DataFrame([1, 2, numpy.nan, 4, numpy.nan])
     with Model() as model:
-        x = Normal('x', 1, 1)
-        Normal('y', x, 1, observed=data)
+        x = Normal("x", 1, 1)
+        Normal("y", x, 1, observed=data)
 
     y_missing, = model.missing_values
     assert y_missing.tag.test_value.shape == (2,)
diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py
index 7ee53a8753..d4dea35970 100644
--- a/pymc3/tests/test_mixture.py
+++ b/pymc3/tests/test_mixture.py
@@ -2,8 +2,20 @@
 from numpy.testing import assert_allclose
 
 from .helpers import SeededTest
-from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \
-    Mixture, NormalMixture, MvNormal, sample, Metropolis, Model
+from pymc3 import (
+    Dirichlet,
+    Gamma,
+    Normal,
+    Lognormal,
+    Poisson,
+    Exponential,
+    Mixture,
+    NormalMixture,
+    MvNormal,
+    sample,
+    Metropolis,
+    Model,
+)
 import scipy.stats as st
 from scipy.special import logsumexp
 from pymc3.theanof import floatX
@@ -30,188 +42,200 @@ def setup_class(cls):
         super(TestMixture, cls).setup_class()
 
         cls.norm_w = np.array([0.75, 0.25])
-        cls.norm_mu = np.array([0., 5.])
+        cls.norm_mu = np.array([0.0, 5.0])
         cls.norm_sd = np.ones_like(cls.norm_mu)
-        cls.norm_x = generate_normal_mixture_data(cls.norm_w, cls.norm_mu, cls.norm_sd, size=1000)
+        cls.norm_x = generate_normal_mixture_data(
+            cls.norm_w, cls.norm_mu, cls.norm_sd, size=1000
+        )
 
         cls.pois_w = np.array([0.4, 0.6])
-        cls.pois_mu = np.array([5., 20.])
+        cls.pois_mu = np.array([5.0, 20.0])
         cls.pois_x = generate_poisson_mixture_data(cls.pois_w, cls.pois_mu, size=1000)
 
     def test_mixture_list_of_normals(self):
         with Model() as model:
-            w = Dirichlet('w', floatX(np.ones_like(self.norm_w)))
-            mu = Normal('mu', 0., 10., shape=self.norm_w.size)
-            tau = Gamma('tau', 1., 1., shape=self.norm_w.size)
-            Mixture('x_obs', w,
-                    [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])],
-                    observed=self.norm_x)
+            w = Dirichlet("w", floatX(np.ones_like(self.norm_w)))
+            mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size)
+            tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size)
+            Mixture(
+                "x_obs",
+                w,
+                [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])],
+                observed=self.norm_x,
+            )
             step = Metropolis()
-            trace = sample(5000, step, random_seed=self.random_seed,
-                           progressbar=False, chains=1)
+            trace = sample(
+                5000, step, random_seed=self.random_seed, progressbar=False, chains=1
+            )
 
-        assert_allclose(np.sort(trace['w'].mean(axis=0)),
-                        np.sort(self.norm_w),
-                        rtol=0.1, atol=0.1)
-        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
-                        np.sort(self.norm_mu),
-                        rtol=0.1, atol=0.1)
+        assert_allclose(
+            np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1
+        )
+        assert_allclose(
+            np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1
+        )
 
     def test_normal_mixture(self):
         with Model() as model:
-            w = Dirichlet('w', floatX(np.ones_like(self.norm_w)))
-            mu = Normal('mu', 0., 10., shape=self.norm_w.size)
-            tau = Gamma('tau', 1., 1., shape=self.norm_w.size)
-            NormalMixture('x_obs', w, mu, tau=tau, observed=self.norm_x)
+            w = Dirichlet("w", floatX(np.ones_like(self.norm_w)))
+            mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size)
+            tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size)
+            NormalMixture("x_obs", w, mu, tau=tau, observed=self.norm_x)
             step = Metropolis()
-            trace = sample(5000, step, random_seed=self.random_seed,
-                           progressbar=False, chains=1)
+            trace = sample(
+                5000, step, random_seed=self.random_seed, progressbar=False, chains=1
+            )
 
-        assert_allclose(np.sort(trace['w'].mean(axis=0)),
-                        np.sort(self.norm_w),
-                        rtol=0.1, atol=0.1)
-        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
-                        np.sort(self.norm_mu),
-                        rtol=0.1, atol=0.1)
+        assert_allclose(
+            np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1
+        )
+        assert_allclose(
+            np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1
+        )
 
     def test_normal_mixture_nd(self):
         nd, ncomp = 3, 5
 
         with Model() as model0:
-            mus = Normal('mus', shape=(nd, ncomp))
-            taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp))
-            ws = Dirichlet('ws', np.ones(ncomp))
-            mixture0 = NormalMixture('m', w=ws, mu=mus, tau=taus, shape=nd)
+            mus = Normal("mus", shape=(nd, ncomp))
+            taus = Gamma("taus", alpha=1, beta=1, shape=(nd, ncomp))
+            ws = Dirichlet("ws", np.ones(ncomp))
+            mixture0 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd)
 
         with Model() as model1:
-            mus = Normal('mus', shape=(nd, ncomp))
-            taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp))
-            ws = Dirichlet('ws', np.ones(ncomp))
-            comp_dist = [Normal.dist(mu=mus[:, i], tau=taus[:, i])
-                         for i in range(ncomp)]
-            mixture1 = Mixture('m', w=ws, comp_dists=comp_dist, shape=nd)
+            mus = Normal("mus", shape=(nd, ncomp))
+            taus = Gamma("taus", alpha=1, beta=1, shape=(nd, ncomp))
+            ws = Dirichlet("ws", np.ones(ncomp))
+            comp_dist = [
+                Normal.dist(mu=mus[:, i], tau=taus[:, i]) for i in range(ncomp)
+            ]
+            mixture1 = Mixture("m", w=ws, comp_dists=comp_dist, shape=nd)
 
         testpoint = model0.test_point
-        testpoint['mus'] = np.random.randn(nd, ncomp)
+        testpoint["mus"] = np.random.randn(nd, ncomp)
         assert_allclose(model0.logp(testpoint), model1.logp(testpoint))
         assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint))
 
     def test_poisson_mixture(self):
         with Model() as model:
-            w = Dirichlet('w', floatX(np.ones_like(self.pois_w)))
-            mu = Gamma('mu', 1., 1., shape=self.pois_w.size)
-            Mixture('x_obs', w, Poisson.dist(mu), observed=self.pois_x)
+            w = Dirichlet("w", floatX(np.ones_like(self.pois_w)))
+            mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size)
+            Mixture("x_obs", w, Poisson.dist(mu), observed=self.pois_x)
             step = Metropolis()
-            trace = sample(5000, step, random_seed=self.random_seed,
-                           progressbar=False, chains=1)
+            trace = sample(
+                5000, step, random_seed=self.random_seed, progressbar=False, chains=1
+            )
 
-        assert_allclose(np.sort(trace['w'].mean(axis=0)),
-                        np.sort(self.pois_w),
-                        rtol=0.1, atol=0.1)
-        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
-                        np.sort(self.pois_mu),
-                        rtol=0.1, atol=0.1)
+        assert_allclose(
+            np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1
+        )
+        assert_allclose(
+            np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1
+        )
 
     def test_mixture_list_of_poissons(self):
         with Model() as model:
-            w = Dirichlet('w', floatX(np.ones_like(self.pois_w)))
-            mu = Gamma('mu', 1., 1., shape=self.pois_w.size)
-            Mixture('x_obs', w,
-                    [Poisson.dist(mu[0]), Poisson.dist(mu[1])],
-                    observed=self.pois_x)
+            w = Dirichlet("w", floatX(np.ones_like(self.pois_w)))
+            mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size)
+            Mixture(
+                "x_obs",
+                w,
+                [Poisson.dist(mu[0]), Poisson.dist(mu[1])],
+                observed=self.pois_x,
+            )
             step = Metropolis()
-            trace = sample(5000, step, random_seed=self.random_seed,
-                           progressbar=False, chains=1)
+            trace = sample(
+                5000, step, random_seed=self.random_seed, progressbar=False, chains=1
+            )
 
-        assert_allclose(np.sort(trace['w'].mean(axis=0)),
-                        np.sort(self.pois_w),
-                        rtol=0.1, atol=0.1)
-        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
-                        np.sort(self.pois_mu),
-                        rtol=0.1, atol=0.1)
+        assert_allclose(
+            np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1
+        )
+        assert_allclose(
+            np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1
+        )
 
     def test_mixture_of_mvn(self):
-        mu1 = np.asarray([0., 1.])
+        mu1 = np.asarray([0.0, 1.0])
         cov1 = np.diag([1.5, 2.5])
-        mu2 = np.asarray([1., 0.])
+        mu2 = np.asarray([1.0, 0.0])
         cov2 = np.diag([2.5, 3.5])
-        obs = np.asarray([[.5, .5], mu1, mu2])
+        obs = np.asarray([[0.5, 0.5], mu1, mu2])
         with Model() as model:
-            w = Dirichlet('w', floatX(np.ones(2)), transform=None)
+            w = Dirichlet("w", floatX(np.ones(2)), transform=None)
             mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1)
             mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2)
-            y = Mixture('x_obs', w, [mvncomp1, mvncomp2],
-                    observed=obs)
+            y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs)
 
         # check logp of each component
-        complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1),
-                                 st.multivariate_normal.logpdf(obs, mu2, cov2))
-                                ).T
+        complogp_st = np.vstack(
+            (
+                st.multivariate_normal.logpdf(obs, mu1, cov1),
+                st.multivariate_normal.logpdf(obs, mu2, cov2),
+            )
+        ).T
         complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
         assert_allclose(complogp, complogp_st)
 
         # check logp of mixture
         testpoint = model.test_point
-        mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st,
-                               axis=-1, keepdims=True)
-        assert_allclose(y.logp_elemwise(testpoint),
-                        mixlogp_st)
+        mixlogp_st = logsumexp(
+            np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=True
+        )
+        assert_allclose(y.logp_elemwise(testpoint), mixlogp_st)
 
         # check logp of model
-        priorlogp = st.dirichlet.logpdf(x=testpoint['w'],
-                                        alpha=np.ones(2),
-                                        )
-        assert_allclose(model.logp(testpoint),
-                        mixlogp_st.sum() + priorlogp)
+        priorlogp = st.dirichlet.logpdf(x=testpoint["w"], alpha=np.ones(2))
+        assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
 
     def test_mixture_of_mixture(self):
         nbr = 4
         with Model() as model:
             # mixtures components
             g_comp = Normal.dist(
-                mu=Exponential('mu_g', lam=1.0, shape=nbr, transform=None),
+                mu=Exponential("mu_g", lam=1.0, shape=nbr, transform=None),
                 sd=1,
-                shape=nbr)
+                shape=nbr,
+            )
             l_comp = Lognormal.dist(
-                mu=Exponential('mu_l', lam=1.0, shape=nbr, transform=None),
+                mu=Exponential("mu_l", lam=1.0, shape=nbr, transform=None),
                 sd=1,
-                shape=nbr)
+                shape=nbr,
+            )
             # weight vector for the mixtures
-            g_w = Dirichlet('g_w', a=floatX(np.ones(nbr)*0.0000001), transform=None)
-            l_w = Dirichlet('l_w', a=floatX(np.ones(nbr)*0.0000001), transform=None)
+            g_w = Dirichlet("g_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None)
+            l_w = Dirichlet("l_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None)
             # mixture components
             g_mix = Mixture.dist(w=g_w, comp_dists=g_comp)
             l_mix = Mixture.dist(w=l_w, comp_dists=l_comp)
             # mixture of mixtures
-            mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None)
-            mix = Mixture('mix', w=mix_w,
-                          comp_dists=[g_mix, l_mix],
-                          observed=np.exp(self.norm_x))
+            mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None)
+            mix = Mixture(
+                "mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)
+            )
 
         test_point = model.test_point
 
         def mixmixlogp(value, point):
-            priorlogp = st.dirichlet.logpdf(x=point['g_w'],
-                                            alpha=np.ones(nbr)*0.0000001,
-                                            ) + \
-                        st.expon.logpdf(x=point['mu_g']).sum() + \
-                        st.dirichlet.logpdf(x=point['l_w'],
-                                            alpha=np.ones(nbr)*0.0000001,
-                                            ) + \
-                        st.expon.logpdf(x=point['mu_l']).sum() + \
-                        st.dirichlet.logpdf(x=point['mix_w'],
-                                            alpha=np.ones(2),
-                                            )
-            complogp1 = st.norm.logpdf(x=value,
-                                       loc=point['mu_g'])
-            mixlogp1 = logsumexp(np.log(point['g_w']) + complogp1,
-                                 axis=-1, keepdims=True)
-            complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l']))
-            mixlogp2 = logsumexp(np.log(point['l_w']) + complogp2,
-                                 axis=-1, keepdims=True)
+            priorlogp = (
+                st.dirichlet.logpdf(x=point["g_w"], alpha=np.ones(nbr) * 0.0000001)
+                + st.expon.logpdf(x=point["mu_g"]).sum()
+                + st.dirichlet.logpdf(x=point["l_w"], alpha=np.ones(nbr) * 0.0000001)
+                + st.expon.logpdf(x=point["mu_l"]).sum()
+                + st.dirichlet.logpdf(x=point["mix_w"], alpha=np.ones(2))
+            )
+            complogp1 = st.norm.logpdf(x=value, loc=point["mu_g"])
+            mixlogp1 = logsumexp(
+                np.log(point["g_w"]) + complogp1, axis=-1, keepdims=True
+            )
+            complogp2 = st.lognorm.logpdf(value, 1.0, 0.0, np.exp(point["mu_l"]))
+            mixlogp2 = logsumexp(
+                np.log(point["l_w"]) + complogp2, axis=-1, keepdims=True
+            )
             complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1)
-            mixmixlogpg = logsumexp(np.log(point['mix_w']) + complogp_mix,
-                                    axis=-1, keepdims=True)
+            mixmixlogpg = logsumexp(
+                np.log(point["mix_w"]) + complogp_mix, axis=-1, keepdims=True
+            )
             return priorlogp, mixmixlogpg
 
         value = np.exp(self.norm_x)[:, None]
@@ -221,13 +245,11 @@ def mixmixlogp(value, point):
         assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
 
         # check model logp
-        assert_allclose(priorlogp + mixmixlogpg.sum(),
-                        model.logp(test_point))
+        assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point))
 
         # check input and check logp again
-        test_point['g_w'] = np.asarray([.1, .1, .2, .6])
-        test_point['mu_g'] = np.exp(np.random.randn(nbr))
+        test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6])
+        test_point["mu_g"] = np.exp(np.random.randn(nbr))
         priorlogp, mixmixlogpg = mixmixlogp(value, test_point)
         assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
-        assert_allclose(priorlogp + mixmixlogpg.sum(),
-                        model.logp(test_point))
+        assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point))
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
index 473ff3a2a4..e7163415d1 100644
--- a/pymc3/tests/test_model.py
+++ b/pymc3/tests/test_model.py
@@ -12,41 +12,41 @@
 
 
 class NewModel(pm.Model):
-    def __init__(self, name='', model=None):
+    def __init__(self, name="", model=None):
         super(NewModel, self).__init__(name, model)
         assert pm.modelcontext(None) is self
         # 1) init variables with Var method
-        self.Var('v1', pm.Normal.dist())
-        self.v2 = pm.Normal('v2', mu=0, sd=1)
+        self.Var("v1", pm.Normal.dist())
+        self.v2 = pm.Normal("v2", mu=0, sd=1)
         # 2) Potentials and Deterministic variables with method too
         # be sure that names will not overlap with other same models
-        pm.Deterministic('d', tt.constant(1))
-        pm.Potential('p', tt.constant(1))
+        pm.Deterministic("d", tt.constant(1))
+        pm.Potential("p", tt.constant(1))
 
 
 class DocstringModel(pm.Model):
-    def __init__(self, mean=0, sd=1, name='', model=None):
+    def __init__(self, mean=0, sd=1, name="", model=None):
         super(DocstringModel, self).__init__(name, model)
-        self.Var('v1', Normal.dist(mu=mean, sd=sd))
-        Normal('v2', mu=mean, sd=sd)
-        Normal('v3', mu=mean, sd=HalfCauchy('sd', beta=10, testval=1.))
-        Deterministic('v3_sq', self.v3 ** 2)
-        Potential('p1', tt.constant(1))
+        self.Var("v1", Normal.dist(mu=mean, sd=sd))
+        Normal("v2", mu=mean, sd=sd)
+        Normal("v3", mu=mean, sd=HalfCauchy("sd", beta=10, testval=1.0))
+        Deterministic("v3_sq", self.v3 ** 2)
+        Potential("p1", tt.constant(1))
 
 
 class TestBaseModel(object):
     def test_setattr_properly_works(self):
         with pm.Model() as model:
-            pm.Normal('v1')
+            pm.Normal("v1")
             assert len(model.vars) == 1
-            with pm.Model('sub') as submodel:
-                submodel.Var('v1', pm.Normal.dist())
-                assert hasattr(submodel, 'v1')
+            with pm.Model("sub") as submodel:
+                submodel.Var("v1", pm.Normal.dist())
+                assert hasattr(submodel, "v1")
                 assert len(submodel.vars) == 1
             assert len(model.vars) == 2
             with submodel:
-                submodel.Var('v2', pm.Normal.dist())
-                assert hasattr(submodel, 'v2')
+                submodel.Var("v2", pm.Normal.dist())
+                assert hasattr(submodel, "v2")
                 assert len(submodel.vars) == 2
             assert len(model.vars) == 3
 
@@ -55,26 +55,26 @@ def test_context_passes_vars_to_parent_model(self):
             # a set of variables is created
             NewModel()
             # another set of variables are created but with prefix 'another'
-            usermodel2 = NewModel(name='another')
+            usermodel2 = NewModel(name="another")
             # you can enter in a context with submodel
             with usermodel2:
-                usermodel2.Var('v3', pm.Normal.dist())
-                pm.Normal('v4')
+                usermodel2.Var("v3", pm.Normal.dist())
+                pm.Normal("v4")
                 # this variable is created in parent model too
-        assert 'another_v2' in model.named_vars
-        assert 'another_v3' in model.named_vars
-        assert 'another_v3' in usermodel2.named_vars
-        assert 'another_v4' in model.named_vars
-        assert 'another_v4' in usermodel2.named_vars
-        assert hasattr(usermodel2, 'v3')
-        assert hasattr(usermodel2, 'v2')
-        assert hasattr(usermodel2, 'v4')
+        assert "another_v2" in model.named_vars
+        assert "another_v3" in model.named_vars
+        assert "another_v3" in usermodel2.named_vars
+        assert "another_v4" in model.named_vars
+        assert "another_v4" in usermodel2.named_vars
+        assert hasattr(usermodel2, "v3")
+        assert hasattr(usermodel2, "v2")
+        assert hasattr(usermodel2, "v4")
         # When you create a class based model you should follow some rules
         with model:
-            m = NewModel('one_more')
-        assert m.d is model['one_more_d']
-        assert m['d'] is model['one_more_d']
-        assert m['one_more_d'] is model['one_more_d']
+            m = NewModel("one_more")
+        assert m.d is model["one_more_d"]
+        assert m["d"] is model["one_more_d"]
+        assert m["one_more_d"] is model["one_more_d"]
 
 
 class TestNested(object):
@@ -84,37 +84,37 @@ def test_nest_context_works(self):
             with new:
                 assert pm.modelcontext(None) is new
             assert pm.modelcontext(None) is m
-        assert 'v1' in m.named_vars
-        assert 'v2' in m.named_vars
+        assert "v1" in m.named_vars
+        assert "v2" in m.named_vars
 
     def test_named_context(self):
         with pm.Model() as m:
-            NewModel(name='new')
-        assert 'new_v1' in m.named_vars
-        assert 'new_v2' in m.named_vars
+            NewModel(name="new")
+        assert "new_v1" in m.named_vars
+        assert "new_v2" in m.named_vars
 
     def test_docstring_example1(self):
         usage1 = DocstringModel()
-        assert 'v1' in usage1.named_vars
-        assert 'v2' in usage1.named_vars
-        assert 'v3' in usage1.named_vars
-        assert 'v3_sq' in usage1.named_vars
+        assert "v1" in usage1.named_vars
+        assert "v2" in usage1.named_vars
+        assert "v3" in usage1.named_vars
+        assert "v3_sq" in usage1.named_vars
         assert len(usage1.potentials), 1
 
     def test_docstring_example2(self):
         with pm.Model() as model:
-            DocstringModel(name='prefix')
-        assert 'prefix_v1' in model.named_vars
-        assert 'prefix_v2' in model.named_vars
-        assert 'prefix_v3' in model.named_vars
-        assert 'prefix_v3_sq' in model.named_vars
+            DocstringModel(name="prefix")
+        assert "prefix_v1" in model.named_vars
+        assert "prefix_v2" in model.named_vars
+        assert "prefix_v3" in model.named_vars
+        assert "prefix_v3_sq" in model.named_vars
         assert len(model.potentials), 1
 
     def test_duplicates_detection(self):
         with pm.Model():
-            DocstringModel(name='prefix')
+            DocstringModel(name="prefix")
             with pytest.raises(ValueError):
-                DocstringModel(name='prefix')
+                DocstringModel(name="prefix")
 
     def test_model_root(self):
         with pm.Model() as model:
@@ -127,15 +127,15 @@ class TestObserved(object):
     def test_observed_rv_fail(self):
         with pytest.raises(TypeError):
             with pm.Model():
-                x = Normal('x')
-                Normal('n', observed=x)
+                x = Normal("x")
+                Normal("n", observed=x)
 
     def test_observed_type(self):
         X_ = np.random.randn(100, 5)
         X = pm.floatX(theano.shared(X_))
         with pm.Model():
-            x1 = pm.Normal('x1', observed=X_)
-            x2 = pm.Normal('x2', observed=X)
+            x1 = pm.Normal("x1", observed=X_)
+            x2 = pm.Normal("x2", observed=X)
 
         assert x1.type == X.type
         assert x2.type == X.type
@@ -143,89 +143,89 @@ def test_observed_type(self):
 
 class TestTheanoConfig(object):
     def test_set_testval_raise(self):
-        with theano.configparser.change_flags(compute_test_value='off'):
+        with theano.configparser.change_flags(compute_test_value="off"):
             with pm.Model():
-                assert theano.config.compute_test_value == 'raise'
-            assert theano.config.compute_test_value == 'off'
+                assert theano.config.compute_test_value == "raise"
+            assert theano.config.compute_test_value == "off"
 
     def test_nested(self):
-        with theano.configparser.change_flags(compute_test_value='off'):
-            with pm.Model(theano_config={'compute_test_value': 'ignore'}):
-                assert theano.config.compute_test_value == 'ignore'
-                with pm.Model(theano_config={'compute_test_value': 'warn'}):
-                    assert theano.config.compute_test_value == 'warn'
-                assert theano.config.compute_test_value == 'ignore'
-            assert theano.config.compute_test_value == 'off'
+        with theano.configparser.change_flags(compute_test_value="off"):
+            with pm.Model(theano_config={"compute_test_value": "ignore"}):
+                assert theano.config.compute_test_value == "ignore"
+                with pm.Model(theano_config={"compute_test_value": "warn"}):
+                    assert theano.config.compute_test_value == "warn"
+                assert theano.config.compute_test_value == "ignore"
+            assert theano.config.compute_test_value == "off"
 
 
 def test_duplicate_vars():
     with pytest.raises(ValueError) as err:
         with pm.Model():
-            pm.Normal('a')
-            pm.Normal('a')
-    err.match('already exists')
+            pm.Normal("a")
+            pm.Normal("a")
+    err.match("already exists")
 
     with pytest.raises(ValueError) as err:
         with pm.Model():
-            pm.Normal('a')
-            pm.Normal('a', transform=transforms.log)
-    err.match('already exists')
+            pm.Normal("a")
+            pm.Normal("a", transform=transforms.log)
+    err.match("already exists")
 
     with pytest.raises(ValueError) as err:
         with pm.Model():
-            a = pm.Normal('a')
-            pm.Potential('a', a**2)
-    err.match('already exists')
+            a = pm.Normal("a")
+            pm.Potential("a", a ** 2)
+    err.match("already exists")
 
     with pytest.raises(ValueError) as err:
         with pm.Model():
-            pm.Binomial('a', 10, .5)
-            pm.Normal('a', transform=transforms.log)
-    err.match('already exists')
+            pm.Binomial("a", 10, 0.5)
+            pm.Normal("a", transform=transforms.log)
+    err.match("already exists")
 
 
 def test_empty_observed():
     data = pd.DataFrame(np.ones((2, 3)) / 3)
     data.values[:] = np.nan
     with pm.Model():
-        a = pm.Normal('a', observed=data)
+        a = pm.Normal("a", observed=data)
         npt.assert_allclose(a.tag.test_value, np.zeros((2, 3)))
-        b = pm.Beta('b', alpha=1, beta=1, observed=data)
+        b = pm.Beta("b", alpha=1, beta=1, observed=data)
         npt.assert_allclose(b.tag.test_value, np.ones((2, 3)) / 2)
 
 
 class TestValueGradFunction(unittest.TestCase):
     def test_no_extra(self):
-        a = tt.vector('a')
+        a = tt.vector("a")
         a.tag.test_value = np.zeros(3, dtype=a.dtype)
         a.dshape = (3,)
         a.dsize = 3
-        f_grad = ValueGradFunction(a.sum(), [a], [], mode='FAST_COMPILE')
+        f_grad = ValueGradFunction(a.sum(), [a], [], mode="FAST_COMPILE")
         assert f_grad.size == 3
 
     def test_invalid_type(self):
-        a = tt.ivector('a')
+        a = tt.ivector("a")
         a.tag.test_value = np.zeros(3, dtype=a.dtype)
         a.dshape = (3,)
         a.dsize = 3
         with pytest.raises(TypeError) as err:
-            ValueGradFunction(a.sum(), [a], [], mode='FAST_COMPILE')
-        err.match('Invalid dtype')
+            ValueGradFunction(a.sum(), [a], [], mode="FAST_COMPILE")
+        err.match("Invalid dtype")
 
     def setUp(self):
-        extra1 = tt.iscalar('extra1')
+        extra1 = tt.iscalar("extra1")
         extra1_ = np.array(0, dtype=extra1.dtype)
         extra1.tag.test_value = extra1_
         extra1.dshape = tuple()
         extra1.dsize = 1
 
-        val1 = tt.vector('val1')
+        val1 = tt.vector("val1")
         val1_ = np.zeros(3, dtype=val1.dtype)
         val1.tag.test_value = val1_
         val1.dshape = (3,)
         val1.dsize = 3
 
-        val2 = tt.matrix('val2')
+        val2 = tt.matrix("val2")
         val2_ = np.zeros((2, 3), dtype=val2.dtype)
         val2.tag.test_value = val2_
         val2.dshape = (2, 3)
@@ -238,47 +238,47 @@ def setUp(self):
         self.cost = extra1 * val1.sum() + val2.sum()
 
         self.f_grad = ValueGradFunction(
-            self.cost, [val1, val2], [extra1], mode='FAST_COMPILE')
+            self.cost, [val1, val2], [extra1], mode="FAST_COMPILE"
+        )
 
     def test_extra_not_set(self):
         with pytest.raises(ValueError) as err:
             self.f_grad.get_extra_values()
-        err.match('Extra values are not set')
+        err.match("Extra values are not set")
 
         with pytest.raises(ValueError) as err:
             self.f_grad(np.zeros(self.f_grad.size, dtype=self.f_grad.dtype))
-        err.match('Extra values are not set')
+        err.match("Extra values are not set")
 
     def test_grad(self):
-        self.f_grad.set_extra_values({'extra1': 5})
+        self.f_grad.set_extra_values({"extra1": 5})
         array = np.ones(self.f_grad.size, dtype=self.f_grad.dtype)
         val, grad = self.f_grad(array)
         assert val == 21
         npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1])
 
     def test_bij(self):
-        self.f_grad.set_extra_values({'extra1': 5})
+        self.f_grad.set_extra_values({"extra1": 5})
         array = np.ones(self.f_grad.size, dtype=self.f_grad.dtype)
         point = self.f_grad.array_to_dict(array)
         assert len(point) == 2
-        npt.assert_allclose(point['val1'], 1)
-        npt.assert_allclose(point['val2'], 1)
+        npt.assert_allclose(point["val1"], 1)
+        npt.assert_allclose(point["val2"], 1)
 
         array2 = self.f_grad.dict_to_array(point)
         npt.assert_allclose(array2, array)
         point_ = self.f_grad.array_to_full_dict(array)
         assert len(point_) == 3
-        assert point_['extra1'] == 5
+        assert point_["extra1"] == 5
 
     def test_edge_case(self):
         # Edge case discovered in #2948
         ndim = 3
         with pm.Model() as m:
-            pm.Lognormal('sigma',
-                         mu=np.zeros(ndim),
-                         tau=np.ones(ndim),
-                         shape=ndim)  # variance for the correlation matrix
-            pm.HalfCauchy('nu', beta=10)
+            pm.Lognormal(
+                "sigma", mu=np.zeros(ndim), tau=np.ones(ndim), shape=ndim
+            )  # variance for the correlation matrix
+            pm.HalfCauchy("nu", beta=10)
             step = pm.NUTS()
 
         func = step._logp_dlogp_func
@@ -287,4 +287,4 @@ def test_edge_case(self):
         logp, dlogp = func(q)
         assert logp.size == 1
         assert dlogp.size == 4
-        npt.assert_allclose(dlogp, 0., atol=1e-5)
+        npt.assert_allclose(dlogp, 0.0, atol=1e-5)
diff --git a/pymc3/tests/test_model_func.py b/pymc3/tests/test_model_func.py
index 95441b1b63..9f60885e0d 100644
--- a/pymc3/tests/test_model_func.py
+++ b/pymc3/tests/test_model_func.py
@@ -5,42 +5,43 @@
 from .models import simple_model, mv_simple
 
 
-tol = 2.0**-11
+tol = 2.0 ** -11
+
 
 def test_logp():
     start, model, (mu, sig) = simple_model()
     lp = model.fastlogp
     lp(start)
-    close_to(lp(start), sp.norm.logpdf(start['x'], mu, sig).sum(), tol)
+    close_to(lp(start), sp.norm.logpdf(start["x"], mu, sig).sum(), tol)
 
 
 def test_dlogp():
     start, model, (mu, sig) = simple_model()
     dlogp = model.fastdlogp()
-    close_to(dlogp(start), -(start['x'] - mu) / sig**2, 1. / sig**2 / 100.)
+    close_to(dlogp(start), -(start["x"] - mu) / sig ** 2, 1.0 / sig ** 2 / 100.0)
 
 
 def test_dlogp2():
     start, model, (_, sig) = mv_simple()
     H = np.linalg.inv(sig)
     d2logp = model.fastd2logp()
-    close_to(d2logp(start), H, np.abs(H / 100.))
+    close_to(d2logp(start), H, np.abs(H / 100.0))
 
 
 def test_deterministic():
     with pm.Model() as model:
-        x = pm.Normal('x', 0, 1)
-        y = pm.Deterministic('y', x**2)
+        x = pm.Normal("x", 0, 1)
+        y = pm.Deterministic("y", x ** 2)
 
     assert model.y == y
-    assert model['y'] == y
+    assert model["y"] == y
 
 
 def test_mapping():
     with pm.Model() as model:
-        mu = pm.Normal('mu', 0, 1)
-        sd = pm.Gamma('sd', 1, 1)
-        y = pm.Normal('y', mu, sd, observed=np.array([.1, .5]))
+        mu = pm.Normal("mu", 0, 1)
+        sd = pm.Gamma("sd", 1, 1)
+        y = pm.Normal("y", mu, sd, observed=np.array([0.1, 0.5]))
     lp = model.fastlogp
     lparray = model.logp_array
     point = model.test_point
@@ -50,6 +51,3 @@ def test_mapping():
     randarray = np.random.randn(*parray.shape)
     randpoint = model.bijection.rmap(randarray)
     assert lp(randpoint) == lparray(randarray)
-
-
-
diff --git a/pymc3/tests/test_model_graph.py b/pymc3/tests/test_model_graph.py
index c261410cc9..70531ecb8e 100644
--- a/pymc3/tests/test_model_graph.py
+++ b/pymc3/tests/test_model_graph.py
@@ -9,42 +9,39 @@ def radon_model():
     """Similar in shape to the Radon model"""
     n_homes = 919
     counties = 85
-    uranium = np.random.normal(-.1, 0.4, size=n_homes)
+    uranium = np.random.normal(-0.1, 0.4, size=n_homes)
     xbar = np.random.normal(1, 0.1, size=n_homes)
     floor_measure = np.random.randint(0, 2, size=n_homes)
     log_radon = np.random.normal(1, 1, size=n_homes)
 
     d, r = divmod(919, 85)
-    county = np.hstack((
-        np.tile(np.arange(counties, dtype=int), d),
-        np.arange(r)
-        ))
+    county = np.hstack((np.tile(np.arange(counties, dtype=int), d), np.arange(r)))
     with pm.Model() as model:
-        sigma_a = pm.HalfCauchy('sigma_a', 5)
-        gamma = pm.Normal('gamma', mu=0., sd=1e5, shape=3)
-        mu_a = pm.Deterministic('mu_a', gamma[0] + gamma[1]*uranium + gamma[2]*xbar)
-        eps_a = pm.Normal('eps_a', mu=0, sd=sigma_a, shape=counties)
-        a = pm.Deterministic('a', mu_a + eps_a[county])
-        b = pm.Normal('b', mu=0., sd=1e15)
-        sigma_y = pm.Uniform('sigma_y', lower=0, upper=100)
+        sigma_a = pm.HalfCauchy("sigma_a", 5)
+        gamma = pm.Normal("gamma", mu=0.0, sd=1e5, shape=3)
+        mu_a = pm.Deterministic("mu_a", gamma[0] + gamma[1] * uranium + gamma[2] * xbar)
+        eps_a = pm.Normal("eps_a", mu=0, sd=sigma_a, shape=counties)
+        a = pm.Deterministic("a", mu_a + eps_a[county])
+        b = pm.Normal("b", mu=0.0, sd=1e15)
+        sigma_y = pm.Uniform("sigma_y", lower=0, upper=100)
         y_hat = a + b * floor_measure
-        y_like = pm.Normal('y_like', mu=y_hat, sd=sigma_y, observed=log_radon)
+        y_like = pm.Normal("y_like", mu=y_hat, sd=sigma_y, observed=log_radon)
 
     compute_graph = {
-        'sigma_a': set(),
-        'gamma': set(),
-        'mu_a': {'gamma'},
-        'eps_a': {'sigma_a'},
-        'a': {'mu_a', 'eps_a'},
-        'b': set(),
-        'sigma_y': set(),
-        'y_like': {'a', 'b', 'sigma_y'}
+        "sigma_a": set(),
+        "gamma": set(),
+        "mu_a": {"gamma"},
+        "eps_a": {"sigma_a"},
+        "a": {"mu_a", "eps_a"},
+        "b": set(),
+        "sigma_y": set(),
+        "y_like": {"a", "b", "sigma_y"},
     }
     plates = {
-        (): {'b', 'sigma_a', 'sigma_y'},
-        (3,): {'gamma'},
-        (85,): {'eps_a'},
-        (919,): {'a', 'mu_a', 'y_like'},
+        (): {"b", "sigma_a", "sigma_y"},
+        (3,): {"gamma"},
+        (85,): {"eps_a"},
+        (919,): {"a", "mu_a", "y_like"},
     }
     return model, compute_graph, plates
 
@@ -76,4 +73,3 @@ def test_graphviz(self):
         g = model_to_graphviz(self.model)
         for key in self.compute_graph:
             assert key in g.source
-
diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py
index 2b191bd144..2c1023a4a0 100644
--- a/pymc3/tests/test_model_helpers.py
+++ b/pymc3/tests/test_model_helpers.py
@@ -20,21 +20,20 @@ def test_pandas_to_array(self):
         sparse_input = sps.csr_matrix(np.eye(3))
         dense_input = np.arange(9).reshape((3, 3))
 
-        input_name = 'input_variable'
+        input_name = "input_variable"
         theano_graph_input = tt.as_tensor(dense_input, name=input_name)
 
         pandas_input = pd.DataFrame(dense_input)
 
         # All the even numbers are replaced with NaN
-        missing_pandas_input = pd.DataFrame(np.array([[np.nan, 1, np.nan],
-                                                      [3, np.nan, 5],
-                                                      [np.nan, 7, np.nan]]))
-        masked_array_input = ma.array(dense_input,
-                                      mask=(np.mod(dense_input, 2) == 0))
+        missing_pandas_input = pd.DataFrame(
+            np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]])
+        )
+        masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0))
 
         # Create a generator object. Apparently the generator object needs to
         # yield numpy arrays.
-        square_generator = (np.array([i**2], dtype=int) for i in range(100))
+        square_generator = (np.array([i ** 2], dtype=int) for i in range(100))
 
         # Alias the function to be tested
         func = pm.model.pandas_to_array
@@ -54,8 +53,7 @@ def test_pandas_to_array(self):
         sparse_output = func(sparse_input)
         assert sps.issparse(sparse_output)
         assert sparse_output.shape == sparse_input.shape
-        npt.assert_allclose(sparse_output.toarray(),
-                            sparse_input.toarray())
+        npt.assert_allclose(sparse_output.toarray(), sparse_input.toarray())
 
         # Check function behavior when using masked array inputs and pandas
         # objects with missing data
@@ -89,11 +87,10 @@ def test_as_tensor(self):
         should return a Sparse Theano object.
         """
         # Create the various inputs to the function
-        input_name = 'testing_inputs'
+        input_name = "testing_inputs"
         sparse_input = sps.csr_matrix(np.eye(3))
         dense_input = np.arange(9).reshape((3, 3))
-        masked_array_input = ma.array(dense_input,
-                                      mask=(np.mod(dense_input, 2) == 0))
+        masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0))
 
         # Create a fake model and fake distribution to be used for the test
         fake_model = pm.Model()
@@ -106,18 +103,11 @@ def test_as_tensor(self):
         func = pm.model.as_tensor
 
         # Check function behavior using the various inputs
-        dense_output = func(dense_input,
-                            input_name,
-                            fake_model,
-                            fake_distribution)
-        sparse_output = func(sparse_input,
-                             input_name,
-                             fake_model,
-                             fake_distribution)
-        masked_output = func(masked_array_input,
-                             input_name,
-                             fake_model,
-                             fake_distribution)
+        dense_output = func(dense_input, input_name, fake_model, fake_distribution)
+        sparse_output = func(sparse_input, input_name, fake_model, fake_distribution)
+        masked_output = func(
+            masked_array_input, input_name, fake_model, fake_distribution
+        )
 
         # Ensure that the missing values are appropriately set to None
         for func_output in [dense_output, sparse_output]:
diff --git a/pymc3/tests/test_modelcontext.py b/pymc3/tests/test_modelcontext.py
index d3073daf84..86c7ffcff8 100644
--- a/pymc3/tests/test_modelcontext.py
+++ b/pymc3/tests/test_modelcontext.py
@@ -12,21 +12,24 @@ def test_thread_safety(self):
         that thread A enters the context manager first, then B,
         then A attempts to declare a variable while B is still in the context manager.
         """
-        aInCtxt,bInCtxt,aDone = [threading.Event() for _ in range(3)]
+        aInCtxt, bInCtxt, aDone = [threading.Event() for _ in range(3)]
         modelA = Model()
         modelB = Model()
+
         def make_model_a():
             with modelA:
                 aInCtxt.set()
                 bInCtxt.wait()
-                Normal('a',0,1)
+                Normal("a", 0, 1)
             aDone.set()
+
         def make_model_b():
             aInCtxt.wait()
             with modelB:
                 bInCtxt.set()
                 aDone.wait()
-                Normal('b', 0, 1)
+                Normal("b", 0, 1)
+
         threadA = threading.Thread(target=make_model_a)
         threadB = threading.Thread(target=make_model_b)
         threadA.start()
@@ -38,7 +41,4 @@ def make_model_b():
         # - B enters it's model context after A, but before a is declared -> a goes into B
         # - A leaves it's model context before B attempts to declare b. A's context manager
         #   takes B from the stack, such that b ends up in model A
-        assert (
-                list(modelA.named_vars),
-                list(modelB.named_vars),
-            ) == (['a'],['b'])
+        assert (list(modelA.named_vars), list(modelB.named_vars)) == (["a"], ["b"])
diff --git a/pymc3/tests/test_models_linear.py b/pymc3/tests/test_models_linear.py
index bc8c0ee0d2..352abaaaf2 100644
--- a/pymc3/tests/test_models_linear.py
+++ b/pymc3/tests/test_models_linear.py
@@ -18,7 +18,7 @@ def setup_class(cls):
         super(TestGLM, cls).setup_class()
         cls.intercept = 1
         cls.slope = 3
-        cls.sd = .05
+        cls.sd = 0.05
         x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000)
         cls.y_linear += np.random.normal(size=1000, scale=cls.sd)
         cls.data_linear = dict(x=x_linear, y=cls.y_linear)
@@ -29,86 +29,99 @@ def setup_class(cls):
         cls.data_logistic = dict(x=x_logistic, y=bern_trials)
 
     def test_linear_component(self):
-        vars_to_create = {
-            'sigma',
-            'sigma_interval__',
-            'y_obs',
-            'lm_x0',
-            'lm_Intercept'
-        }
+        vars_to_create = {"sigma", "sigma_interval__", "y_obs", "lm_x0", "lm_Intercept"}
         with Model() as model:
             lm = LinearComponent(
-                self.data_linear['x'],
-                self.data_linear['y'],
-                name='lm'
-            )   # yields lm_x0, lm_Intercept
-            sigma = Uniform('sigma', 0, 20)     # yields sigma_interval__
-            Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear)  # yields y_obs
+                self.data_linear["x"], self.data_linear["y"], name="lm"
+            )  # yields lm_x0, lm_Intercept
+            sigma = Uniform("sigma", 0, 20)  # yields sigma_interval__
+            Normal(
+                "y_obs", mu=lm.y_est, sd=sigma, observed=self.y_linear
+            )  # yields y_obs
             start = find_MAP(vars=[sigma])
             step = Slice(model.vars)
-            trace = sample(500, tune=0, step=step, start=start,
-                           progressbar=False, random_seed=self.random_seed)
+            trace = sample(
+                500,
+                tune=0,
+                step=step,
+                start=start,
+                progressbar=False,
+                random_seed=self.random_seed,
+            )
 
-            assert round(abs(np.mean(trace['lm_Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['lm_x0'])-self.slope), 1) == 0
-            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
+            assert round(abs(np.mean(trace["lm_Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["lm_x0"]) - self.slope), 1) == 0
+            assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0
         assert vars_to_create == set(model.named_vars.keys())
 
     def test_linear_component_from_formula(self):
         with Model() as model:
-            lm = LinearComponent.from_formula('y ~ x', self.data_linear)
-            sigma = Uniform('sigma', 0, 20)
-            Normal('y_obs', mu=lm.y_est, sd=sigma, observed=self.y_linear)
+            lm = LinearComponent.from_formula("y ~ x", self.data_linear)
+            sigma = Uniform("sigma", 0, 20)
+            Normal("y_obs", mu=lm.y_est, sd=sigma, observed=self.y_linear)
             start = find_MAP(vars=[sigma])
             step = Slice(model.vars)
-            trace = sample(500, tune=0, step=step, start=start,
-                           progressbar=False,
-                           random_seed=self.random_seed)
+            trace = sample(
+                500,
+                tune=0,
+                step=step,
+                start=start,
+                progressbar=False,
+                random_seed=self.random_seed,
+            )
 
-            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
-            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0
+            assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0
+            assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0
 
     def test_glm(self):
         with Model() as model:
             vars_to_create = {
-                'glm_sd',
-                'glm_sd_log__',
-                'glm_y',
-                'glm_x0',
-                'glm_Intercept'
+                "glm_sd",
+                "glm_sd_log__",
+                "glm_y",
+                "glm_x0",
+                "glm_Intercept",
             }
-            GLM(
-                self.data_linear['x'],
-                self.data_linear['y'],
-                name='glm'
-            )
+            GLM(self.data_linear["x"], self.data_linear["y"], name="glm")
             start = find_MAP()
             step = Slice(model.vars)
-            trace = sample(500, tune=0, step=step, start=start,
-                           progressbar=False, random_seed=self.random_seed)
-            assert round(abs(np.mean(trace['glm_Intercept'])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['glm_x0'])-self.slope), 1) == 0
-            assert round(abs(np.mean(trace['glm_sd'])-self.sd), 1) == 0
+            trace = sample(
+                500,
+                tune=0,
+                step=step,
+                start=start,
+                progressbar=False,
+                random_seed=self.random_seed,
+            )
+            assert round(abs(np.mean(trace["glm_Intercept"]) - self.intercept), 1) == 0
+            assert round(abs(np.mean(trace["glm_x0"]) - self.slope), 1) == 0
+            assert round(abs(np.mean(trace["glm_sd"]) - self.sd), 1) == 0
             assert vars_to_create == set(model.named_vars.keys())
 
     def test_glm_from_formula(self):
         with Model() as model:
-            NAME = 'glm'
-            GLM.from_formula('y ~ x', self.data_linear, name=NAME)
+            NAME = "glm"
+            GLM.from_formula("y ~ x", self.data_linear, name=NAME)
             start = find_MAP()
             step = Slice(model.vars)
-            trace = sample(500, tune=0, step=step, start=start,
-                           progressbar=False, random_seed=self.random_seed)
+            trace = sample(
+                500,
+                tune=0,
+                step=step,
+                start=start,
+                progressbar=False,
+                random_seed=self.random_seed,
+            )
 
-            assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0
-            assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0
-            assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0
+            assert (
+                round(abs(np.mean(trace["%s_Intercept" % NAME]) - self.intercept), 1)
+                == 0
+            )
+            assert round(abs(np.mean(trace["%s_x" % NAME]) - self.slope), 1) == 0
+            assert round(abs(np.mean(trace["%s_sd" % NAME]) - self.sd), 1) == 0
 
     def test_strange_types(self):
         with Model():
-            with pytest.raises(
-                ValueError):
-                GLM(1,
-                self.data_linear['y'],
-                name='lm')
+            with pytest.raises(ValueError):
+                GLM(1, self.data_linear["y"], name="lm")
diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py
index d32aea7ec4..7168b0692e 100644
--- a/pymc3/tests/test_models_utils.py
+++ b/pymc3/tests/test_models_utils.py
@@ -11,63 +11,64 @@ def setup_method(self):
 
     def assertMatrixLabels(self, m, l, mt=None, lt=None):
         assert np.all(
-                np.equal(
-                    m.eval(),
-                    mt if mt is not None else self.data.as_matrix()
-                )
-            )
+            np.equal(m.eval(), mt if mt is not None else self.data.as_matrix())
+        )
         assert l == list(lt or self.data.columns)
 
     def test_numpy_init(self):
         m, l = utils.any_to_tensor_and_labels(self.data.as_matrix())
-        self.assertMatrixLabels(m, l, lt=['x0', 'x1'])
-        m, l = utils.any_to_tensor_and_labels(self.data.as_matrix(), labels=['x2', 'x3'])
-        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+        self.assertMatrixLabels(m, l, lt=["x0", "x1"])
+        m, l = utils.any_to_tensor_and_labels(
+            self.data.as_matrix(), labels=["x2", "x3"]
+        )
+        self.assertMatrixLabels(m, l, lt=["x2", "x3"])
 
     def test_pandas_init(self):
         m, l = utils.any_to_tensor_and_labels(self.data)
         self.assertMatrixLabels(m, l)
-        m, l = utils.any_to_tensor_and_labels(self.data, labels=['x2', 'x3'])
-        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+        m, l = utils.any_to_tensor_and_labels(self.data, labels=["x2", "x3"])
+        self.assertMatrixLabels(m, l, lt=["x2", "x3"])
 
     def test_dict_input(self):
-        m, l = utils.any_to_tensor_and_labels(self.data.to_dict('dict'))
+        m, l = utils.any_to_tensor_and_labels(self.data.to_dict("dict"))
         self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
 
-        m, l = utils.any_to_tensor_and_labels(self.data.to_dict('series'))
+        m, l = utils.any_to_tensor_and_labels(self.data.to_dict("series"))
         self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
 
-        m, l = utils.any_to_tensor_and_labels(self.data.to_dict('list'))
+        m, l = utils.any_to_tensor_and_labels(self.data.to_dict("list"))
         self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
 
-        inp = {k: tt.as_tensor_variable(v) for k, v in self.data.to_dict('series').items()}
+        inp = {
+            k: tt.as_tensor_variable(v) for k, v in self.data.to_dict("series").items()
+        }
         m, l = utils.any_to_tensor_and_labels(inp)
         self.assertMatrixLabels(m, l, mt=self.data.as_matrix(l), lt=l)
 
     def test_list_input(self):
         m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist())
-        self.assertMatrixLabels(m, l, lt=['x0', 'x1'])
-        m, l = utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), labels=['x2', 'x3'])
-        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+        self.assertMatrixLabels(m, l, lt=["x0", "x1"])
+        m, l = utils.any_to_tensor_and_labels(
+            self.data.as_matrix().tolist(), labels=["x2", "x3"]
+        )
+        self.assertMatrixLabels(m, l, lt=["x2", "x3"])
 
     def test_tensor_input(self):
         m, l = utils.any_to_tensor_and_labels(
-            tt.as_tensor_variable(self.data.as_matrix().tolist()),
-            labels=['x0', 'x1']
+            tt.as_tensor_variable(self.data.as_matrix().tolist()), labels=["x0", "x1"]
         )
-        self.assertMatrixLabels(m, l, lt=['x0', 'x1'])
+        self.assertMatrixLabels(m, l, lt=["x0", "x1"])
         m, l = utils.any_to_tensor_and_labels(
-            tt.as_tensor_variable(self.data.as_matrix().tolist()),
-            labels=['x2', 'x3'])
-        self.assertMatrixLabels(m, l, lt=['x2', 'x3'])
+            tt.as_tensor_variable(self.data.as_matrix().tolist()), labels=["x2", "x3"]
+        )
+        self.assertMatrixLabels(m, l, lt=["x2", "x3"])
 
     def test_user_mistakes(self):
         # no labels for tensor variable
-        with pytest.raises(
-            ValueError):
-            utils.any_to_tensor_and_labels(tt.as_tensor_variable(self.data.as_matrix().tolist()))
+        with pytest.raises(ValueError):
+            utils.any_to_tensor_and_labels(
+                tt.as_tensor_variable(self.data.as_matrix().tolist())
+            )
         # len of labels is bad
-        with pytest.raises(
-            ValueError):
-            utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(),
-            labels=['x'])
+        with pytest.raises(ValueError):
+            utils.any_to_tensor_and_labels(self.data.as_matrix().tolist(), labels=["x"])
diff --git a/pymc3/tests/test_ndarray_backend.py b/pymc3/tests/test_ndarray_backend.py
index 12b295551a..7fddc75906 100644
--- a/pymc3/tests/test_ndarray_backend.py
+++ b/pymc3/tests/test_ndarray_backend.py
@@ -6,17 +6,9 @@
 import pytest
 
 
-STATS1 = [{
-    'a': np.float64,
-    'b': np.bool
-}]
+STATS1 = [{"a": np.float64, "b": np.bool}]
 
-STATS2 = [{
-    'a': np.float64
-}, {
-    'a': np.float64,
-    'b': np.int64,
-}]
+STATS2 = [{"a": np.float64}, {"a": np.float64, "b": np.int64}]
 
 
 class TestNDArray0dSampling(bf.SamplingTestCase):
@@ -128,7 +120,7 @@ class TestMultiTrace_add_remove_values(bf.ModelBackendSampledTestCase):
     def test_add_values(self):
         mtrace = self.mtrace
         orig_varnames = list(mtrace.varnames)
-        name = 'new_var'
+        name = "new_var"
         vals = mtrace[orig_varnames[0]]
         mtrace.add_values({name: vals})
         assert len(orig_varnames) == len(mtrace.varnames) - 1
@@ -140,7 +132,6 @@ def test_add_values(self):
 
 
 class TestSqueezeCat(object):
-
     def setup_method(self):
         self.x = np.arange(10)
         self.y = np.arange(10, 20)
@@ -170,13 +161,14 @@ def test_combine_true_squeeze_true(self):
         result = base._squeeze_cat([self.x, self.y], True, True)
         npt.assert_equal(result, expected)
 
+
 class TestSaveLoad(object):
     @staticmethod
     def model():
         with pm.Model() as model:
-            x = pm.Normal('x', 0, 1)
-            y = pm.Normal('y', x, 1, observed=2)
-            z = pm.Normal('z', x + y, 1)
+            x = pm.Normal("x", 0, 1)
+            y = pm.Normal("y", x, 1, observed=2)
+            z = pm.Normal("z", x + y, 1)
         return model
 
     @classmethod
@@ -185,12 +177,12 @@ def setup_class(cls):
             cls.trace = pm.sample()
 
     def test_save_new_model(self, tmpdir_factory):
-        directory = str(tmpdir_factory.mktemp('data'))
+        directory = str(tmpdir_factory.mktemp("data"))
         save_dir = pm.save_trace(self.trace, directory, overwrite=True)
 
         assert save_dir == directory
         with pm.Model() as model:
-            w = pm.Normal('w', 0, 1)
+            w = pm.Normal("w", 0, 1)
             new_trace = pm.sample()
 
         with pytest.raises(OSError):
@@ -200,21 +192,21 @@ def test_save_new_model(self, tmpdir_factory):
         with model:
             new_trace_copy = pm.load_trace(directory)
 
-        assert (new_trace['w'] == new_trace_copy['w']).all()
+        assert (new_trace["w"] == new_trace_copy["w"]).all()
 
     def test_save_and_load(self, tmpdir_factory):
-        directory = str(tmpdir_factory.mktemp('data'))
+        directory = str(tmpdir_factory.mktemp("data"))
         save_dir = pm.save_trace(self.trace, directory, overwrite=True)
 
         assert save_dir == directory
 
         trace2 = pm.load_trace(directory, model=TestSaveLoad.model())
 
-        for var in ('x', 'z'):
+        for var in ("x", "z"):
             assert (self.trace[var] == trace2[var]).all()
 
     def test_sample_posterior_predictive(self, tmpdir_factory):
-        directory = str(tmpdir_factory.mktemp('data'))
+        directory = str(tmpdir_factory.mktemp("data"))
         save_dir = pm.save_trace(self.trace, directory, overwrite=True)
 
         assert save_dir == directory
diff --git a/pymc3/tests/test_parallel_sampling.py b/pymc3/tests/test_parallel_sampling.py
index 515c130a90..eeceb3e23f 100644
--- a/pymc3/tests/test_parallel_sampling.py
+++ b/pymc3/tests/test_parallel_sampling.py
@@ -6,39 +6,39 @@
 import pymc3 as pm
 
 
-@pytest.mark.skipif(sys.version_info < (3,3),
-                    reason="requires python3.3")
+@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3")
 def test_abort():
     with pm.Model() as model:
-        a = pm.Normal('a', shape=1)
-        pm.HalfNormal('b')
+        a = pm.Normal("a", shape=1)
+        pm.HalfNormal("b")
         step1 = pm.NUTS([a])
         step2 = pm.Metropolis([model.b_log__])
 
     step = pm.CompoundStep([step1, step2])
 
-    proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1,
-                             start={'a': 1., 'b_log__': 2.})
+    proc = ps.ProcessAdapter(
+        10, 10, step, chain=3, seed=1, start={"a": 1.0, "b_log__": 2.0}
+    )
     proc.start()
     proc.write_next()
     proc.abort()
     proc.join()
 
 
-@pytest.mark.skipif(sys.version_info < (3,3),
-                    reason="requires python3.3")
+@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3")
 def test_explicit_sample():
     with pm.Model() as model:
-        a = pm.Normal('a', shape=1)
-        pm.HalfNormal('b')
+        a = pm.Normal("a", shape=1)
+        pm.HalfNormal("b")
         step1 = pm.NUTS([a])
         step2 = pm.Metropolis([model.b_log__])
 
     step = pm.CompoundStep([step1, step2])
 
     start = time.time()
-    proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1,
-                             start={'a': 1., 'b_log__': 2.})
+    proc = ps.ProcessAdapter(
+        10, 10, step, chain=3, seed=1, start={"a": 1.0, "b_log__": 2.0}
+    )
     proc.start()
     while True:
         proc.write_next()
@@ -52,21 +52,19 @@ def test_explicit_sample():
     print(time.time() - start)
 
 
-@pytest.mark.skipif(sys.version_info < (3,3),
-                    reason="requires python3.3")
+@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3")
 def test_iterator():
     with pm.Model() as model:
-        a = pm.Normal('a', shape=1)
-        pm.HalfNormal('b')
+        a = pm.Normal("a", shape=1)
+        pm.HalfNormal("b")
         step1 = pm.NUTS([a])
         step2 = pm.Metropolis([model.b_log__])
 
     step = pm.CompoundStep([step1, step2])
 
     start = time.time()
-    start = {'a': 1., 'b_log__': 2.}
-    sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3,
-                                 step, 0, False)
+    start = {"a": 1.0, "b_log__": 2.0}
+    sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3, step, 0, False)
     with sampler:
         for draw in sampler:
             pass
diff --git a/pymc3/tests/test_pickling.py b/pymc3/tests/test_pickling.py
index c3274af14e..5249af834f 100644
--- a/pymc3/tests/test_pickling.py
+++ b/pymc3/tests/test_pickling.py
@@ -9,12 +9,13 @@ def setup_method(self):
 
     def test_model_roundtrip(self):
         m = self.model
-        for proto in range(pickle.HIGHEST_PROTOCOL+1):
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
             try:
                 s = pickle.dumps(m, proto)
                 pickle.loads(s)
             except Exception:
                 raise AssertionError(
-                    "Exception while trying roundtrip with pickle protocol %d:\n" % proto +
-                    ''.join(traceback.format_exc())
+                    "Exception while trying roundtrip with pickle protocol %d:\n"
+                    % proto
+                    + "".join(traceback.format_exc())
                 )
diff --git a/pymc3/tests/test_plots.py b/pymc3/tests/test_plots.py
index fa5e9100c3..03baf8a332 100644
--- a/pymc3/tests/test_plots.py
+++ b/pymc3/tests/test_plots.py
@@ -1,12 +1,21 @@
 import matplotlib
-matplotlib.use('Agg', warn=False)  # noqa
+
+matplotlib.use("Agg", warn=False)  # noqa
 
 import numpy as np
 import pymc3 as pm
 from .checks import close_to
 
 from .models import multidimensional_model, simple_categorical
-from ..plots import traceplot, forestplot, autocorrplot, plot_posterior, energyplot, densityplot, pairplot
+from ..plots import (
+    traceplot,
+    forestplot,
+    autocorrplot,
+    plot_posterior,
+    energyplot,
+    densityplot,
+    pairplot,
+)
 from ..plots.utils import make_2d
 from ..step_methods import Slice, Metropolis
 from ..sampling import sample
@@ -30,7 +39,8 @@ def test_plots():
     plot_posterior(trace)
     autocorrplot(trace)
     energyplot(trace)
-    densityplot(trace) 
+    densityplot(trace)
+
 
 def test_energyplot():
     with asmod.build_model():
@@ -38,7 +48,7 @@ def test_energyplot():
 
     energyplot(trace)
     energyplot(trace, shade=0.5, alpha=0)
-    energyplot(trace, kind='hist')
+    energyplot(trace, kind="hist")
 
 
 def test_plots_categorical():
@@ -60,25 +70,27 @@ def test_plots_multidimensional():
         h = np.diag(find_hessian(start))
         step = Metropolis(model.vars, h)
         trace = sample(3000, tune=0, step=step, start=start)
-    
+
     traceplot(trace)
     plot_posterior(trace)
     forestplot(trace)
     densityplot(trace)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on GPU due to cores=2")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on GPU due to cores=2"
+)
 def test_multichain_plots():
     model = build_disaster_model()
     with model:
         # Run sampler
         step1 = Slice([model.early_mean_log__, model.late_mean_log__])
         step2 = Metropolis([model.switchpoint])
-        start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50}
+        start = {"early_mean": 2.0, "late_mean": 3.0, "switchpoint": 50}
         ptrace = sample(1000, tune=0, step=[step1, step2], start=start, cores=2)
 
-    forestplot(ptrace, varnames=['early_mean', 'late_mean'])
-    autocorrplot(ptrace, varnames=['switchpoint'])
+    forestplot(ptrace, varnames=["early_mean", "late_mean"])
+    autocorrplot(ptrace, varnames=["switchpoint"])
     plot_posterior(ptrace)
 
 
@@ -97,7 +109,7 @@ def test_make_2d():
 
 def test_plots_transformed():
     with pm.Model():
-        pm.Uniform('x', 0, 1)
+        pm.Uniform("x", 0, 1)
         step = pm.Metropolis()
         trace = pm.sample(100, tune=0, step=step, chains=1)
 
@@ -106,10 +118,10 @@ def test_plots_transformed():
     assert autocorrplot(trace).shape == (1, 1)
     assert autocorrplot(trace, plot_transformed=True).shape == (2, 1)
     assert plot_posterior(trace).numCols == 1
-    assert plot_posterior(trace, plot_transformed=True).shape == (2, )
+    assert plot_posterior(trace, plot_transformed=True).shape == (2,)
 
     with pm.Model():
-        pm.Uniform('x', 0, 1)
+        pm.Uniform("x", 0, 1)
         step = pm.Metropolis()
         trace = pm.sample(100, tune=0, step=step, chains=2)
 
@@ -118,17 +130,17 @@ def test_plots_transformed():
     assert autocorrplot(trace).shape == (1, 2)
     assert autocorrplot(trace, plot_transformed=True).shape == (2, 2)
     assert plot_posterior(trace).numCols == 1
-    assert plot_posterior(trace, plot_transformed=True).shape == (2, )
+    assert plot_posterior(trace, plot_transformed=True).shape == (2,)
+
 
 def test_pairplot():
     with pm.Model() as model:
-        a = pm.Normal('a', shape=2)
-        c = pm.HalfNormal('c', shape=2)
-        b = pm.Normal('b', a, c, shape=2)
-        d = pm.Normal('d', 100, 1)
+        a = pm.Normal("a", shape=2)
+        c = pm.HalfNormal("c", shape=2)
+        b = pm.Normal("b", a, c, shape=2)
+        d = pm.Normal("d", 100, 1)
         trace = pm.sample(1000)
 
     pairplot(trace)
     pairplot(trace, hexbin=True, plot_transformed=True)
-    pairplot(trace, sub_varnames=['a_0', 'c_0', 'b_1'])
-    
\ No newline at end of file
+    pairplot(trace, sub_varnames=["a_0", "c_0", "b_1"])
diff --git a/pymc3/tests/test_posdef_sym.py b/pymc3/tests/test_posdef_sym.py
index 6ae5c731e2..e2ceb7a5ad 100644
--- a/pymc3/tests/test_posdef_sym.py
+++ b/pymc3/tests/test_posdef_sym.py
@@ -4,12 +4,12 @@
 
 
 def test_posdef_symmetric1():
-    data = np.array([[1., 0], [0, 1]], dtype=theano.config.floatX)
+    data = np.array([[1.0, 0], [0, 1]], dtype=theano.config.floatX)
     assert mv.posdef(data) == 1
 
 
 def test_posdef_symmetric2():
-    data = np.array([[1., 2], [2, 1]], dtype=theano.config.floatX)
+    data = np.array([[1.0, 2], [2, 1]], dtype=theano.config.floatX)
     assert mv.posdef(data) == 0
 
 
@@ -18,13 +18,11 @@ def test_posdef_symmetric3():
 
     Is this correct?
     """
-    data = np.array([[1., 1], [1, 1]], dtype=theano.config.floatX)
+    data = np.array([[1.0, 1], [1, 1]], dtype=theano.config.floatX)
     assert mv.posdef(data) == 0
 
 
 def test_posdef_symmetric4():
-    d = np.array([[1,  .99,  1],
-                  [.99, 1,  .999],
-                  [1,  .999, 1]], theano.config.floatX)
+    d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], theano.config.floatX)
 
     assert mv.posdef(d) == 0
diff --git a/pymc3/tests/test_posteriors.py b/pymc3/tests/test_posteriors.py
index 5d2c6ef356..90ed193226 100644
--- a/pymc3/tests/test_posteriors.py
+++ b/pymc3/tests/test_posteriors.py
@@ -2,7 +2,10 @@
 from . import sampler_fixtures as sf
 import theano
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestNUTSUniform(sf.NutsFixture, sf.UniformFixture):
     n_samples = 10000
     tune = 1000
@@ -34,19 +37,19 @@ class TestSliceUniform(sf.SliceFixture, sf.UniformFixture):
 
 
 class TestNUTSUniform2(TestNUTSUniform):
-    step_args = {'target_accept': 0.95, 'integrator': 'two-stage'}
+    step_args = {"target_accept": 0.95, "integrator": "two-stage"}
 
 
 class TestNUTSUniform3(TestNUTSUniform):
-    step_args = {'target_accept': 0.80, 'integrator': 'two-stage'}
+    step_args = {"target_accept": 0.80, "integrator": "two-stage"}
 
 
 class TestNUTSUniform4(TestNUTSUniform):
-    step_args = {'target_accept': 0.95, 'integrator': 'three-stage'}
+    step_args = {"target_accept": 0.95, "integrator": "three-stage"}
 
 
 class TestNUTSUniform5(TestNUTSUniform):
-    step_args = {'target_accept': 0.80, 'integrator': 'three-stage'}
+    step_args = {"target_accept": 0.80, "integrator": "three-stage"}
 
 
 class TestNUTSNormal(sf.NutsFixture, sf.NormalFixture):
@@ -78,7 +81,7 @@ class TestNUTSStudentT(sf.NutsFixture, sf.StudentTFixture):
     atol = 0.05
 
 
-@pytest.mark.skip('Takes too long to run')
+@pytest.mark.skip("Takes too long to run")
 class TestNUTSNormalLong(sf.NutsFixture, sf.NormalFixture):
     n_samples = 500000
     tune = 5000
diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py
index c8838f0418..a800691674 100644
--- a/pymc3/tests/test_quadpotential.py
+++ b/pymc3/tests/test_quadpotential.py
@@ -38,16 +38,16 @@ def test_equal_diag():
         x = floatX(np.random.randn(5))
         pots = [
             quadpotential.quad_potential(diag, False),
-            quadpotential.quad_potential(1. / diag, True),
+            quadpotential.quad_potential(1.0 / diag, True),
             quadpotential.quad_potential(np.diag(diag), False),
-            quadpotential.quad_potential(np.diag(1. / diag), True),
+            quadpotential.quad_potential(np.diag(1.0 / diag), True),
         ]
         if quadpotential.chol_available:
-            diag_ = scipy.sparse.csc_matrix(np.diag(1. / diag))
+            diag_ = scipy.sparse.csc_matrix(np.diag(1.0 / diag))
             pots.append(quadpotential.quad_potential(diag_, True))
 
-        v = np.diag(1. / diag).dot(x)
-        e = x.dot(np.diag(1. / diag).dot(x)) / 2
+        v = np.diag(1.0 / diag).dot(x)
+        e = x.dot(np.diag(1.0 / diag).dot(x)) / 2
         for pot in pots:
             v_ = pot.velocity(x)
             e_ = pot.energy(x)
@@ -85,9 +85,9 @@ def test_random_diag():
     np.random.seed(42)
     pots = [
         quadpotential.quad_potential(d, True),
-        quadpotential.quad_potential(1./d, False),
+        quadpotential.quad_potential(1.0 / d, False),
         quadpotential.quad_potential(np.diag(d), True),
-        quadpotential.quad_potential(np.diag(1./d), False),
+        quadpotential.quad_potential(np.diag(1.0 / d), False),
     ]
     if quadpotential.chol_available:
         d_ = scipy.sparse.csc_matrix(np.diag(d))
@@ -95,7 +95,7 @@ def test_random_diag():
         pots.append(pot)
     for pot in pots:
         vals = np.array([pot.random() for _ in range(1000)])
-        npt.assert_allclose(vals.std(0), np.sqrt(1./d), atol=0.1)
+        npt.assert_allclose(vals.std(0), np.sqrt(1.0 / d), atol=0.1)
 
 
 def test_random_dense():
diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py
index f61fe3b4a7..10097e7606 100644
--- a/pymc3/tests/test_random.py
+++ b/pymc3/tests/test_random.py
@@ -10,28 +10,28 @@
 
 def test_draw_value():
     npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6])
-    npt.assert_equal(_draw_value(np.array(5.)), 5)
+    npt.assert_equal(_draw_value(np.array(5.0)), 5)
 
-    npt.assert_equal(_draw_value(tt.constant([5., 6.])), [5, 6])
+    npt.assert_equal(_draw_value(tt.constant([5.0, 6.0])), [5, 6])
     assert _draw_value(tt.constant(5)) == 5
-    npt.assert_equal(_draw_value(2 * tt.constant([5., 6.])), [10, 12])
+    npt.assert_equal(_draw_value(2 * tt.constant([5.0, 6.0])), [10, 12])
 
-    val = theano.shared(np.array([5., 6.]))
+    val = theano.shared(np.array([5.0, 6.0]))
     npt.assert_equal(_draw_value(val), [5, 6])
     npt.assert_equal(_draw_value(2 * val), [10, 12])
 
-    a = tt.scalar('a')
+    a = tt.scalar("a")
     a.tag.test_value = 6
     npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2)
 
     assert _draw_value(5) == 5
-    assert _draw_value(5.) == 5
-    assert isinstance(_draw_value(5.), type(5.))
+    assert _draw_value(5.0) == 5
+    assert isinstance(_draw_value(5.0), type(5.0))
     assert isinstance(_draw_value(5), type(5))
 
     with pm.Model():
-        mu = 2 * tt.constant(np.array([5., 6.])) + theano.shared(np.array(5))
-        a = pm.Normal('a', mu=mu, sd=5, shape=2)
+        mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
+        a = pm.Normal("a", mu=mu, sd=5, shape=2)
 
     val1 = _draw_value(a)
     val2 = _draw_value(a)
@@ -39,7 +39,7 @@ def test_draw_value():
 
     with pytest.raises(ValueError) as err:
         _draw_value([])
-    err.match('Unexpected type')
+    err.match("Unexpected type")
 
 
 class TestDrawValues(object):
@@ -48,43 +48,50 @@ def test_empty(self):
 
     def test_vals(self):
         npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6])
-        npt.assert_equal(draw_values([np.array(5.)])[0], 5)
+        npt.assert_equal(draw_values([np.array(5.0)])[0], 5)
 
-        npt.assert_equal(draw_values([tt.constant([5., 6.])])[0], [5, 6])
+        npt.assert_equal(draw_values([tt.constant([5.0, 6.0])])[0], [5, 6])
         assert draw_values([tt.constant(5)])[0] == 5
-        npt.assert_equal(draw_values([2 * tt.constant([5., 6.])])[0], [10, 12])
+        npt.assert_equal(draw_values([2 * tt.constant([5.0, 6.0])])[0], [10, 12])
 
-        val = theano.shared(np.array([5., 6.]))
+        val = theano.shared(np.array([5.0, 6.0]))
         npt.assert_equal(draw_values([val])[0], [5, 6])
         npt.assert_equal(draw_values([2 * val])[0], [10, 12])
 
     def test_simple_model(self):
         with pm.Model():
-            mu = 2 * tt.constant(np.array([5., 6.])) + theano.shared(np.array(5))
-            a = pm.Normal('a', mu=mu, sd=5, shape=2)
+            mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
+            a = pm.Normal("a", mu=mu, sd=5, shape=2)
 
         val1 = draw_values([a])
         val2 = draw_values([a])
         assert np.all(val1[0] != val2[0])
 
-        point = {'a': np.array([3., 4.])}
-        npt.assert_equal(draw_values([a], point=point), [point['a']])
+        point = {"a": np.array([3.0, 4.0])}
+        npt.assert_equal(draw_values([a], point=point), [point["a"]])
 
     def test_dep_vars(self):
         with pm.Model():
-            mu = 2 * tt.constant(np.array([5., 6.])) + theano.shared(np.array(5))
-            sd = pm.HalfNormal('sd', shape=2)
+            mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
+            sd = pm.HalfNormal("sd", shape=2)
             tau = 1 / sd ** 2
-            a = pm.Normal('a', mu=mu, tau=tau, shape=2)
+            a = pm.Normal("a", mu=mu, tau=tau, shape=2)
 
-        point = {'a': np.array([1., 2.])}
-        npt.assert_equal(draw_values([a], point=point), [point['a']])
+        point = {"a": np.array([1.0, 2.0])}
+        npt.assert_equal(draw_values([a], point=point), [point["a"]])
 
         val1 = draw_values([a])[0]
-        val2 = draw_values([a], point={'sd': np.array([2., 3.])})[0]
-        val3 = draw_values([a], point={'sd_log__': np.array([2., 3.])})[0]
-        val4 = draw_values([a], point={'sd_log__': np.array([2., 3.])})[0]
-        
-        assert all([np.all(val1 != val2), np.all(val1 != val3),
-                    np.all(val1 != val4), np.all(val2 != val3),
-                    np.all(val2 != val4), np.all(val3 != val4)])
+        val2 = draw_values([a], point={"sd": np.array([2.0, 3.0])})[0]
+        val3 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0]
+        val4 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0]
+
+        assert all(
+            [
+                np.all(val1 != val2),
+                np.all(val1 != val3),
+                np.all(val1 != val4),
+                np.all(val2 != val3),
+                np.all(val2 != val4),
+                np.all(val3 != val4),
+            ]
+        )
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 4adb5ed487..cadafbe810 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -18,7 +18,9 @@
 import pytest
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestSample(SeededTest):
     def setup_method(self):
         super(TestSample, self).setup_method()
@@ -43,10 +45,10 @@ def test_parallel_sample_does_not_reuse_seed(self):
                 trace = pm.sample(100, tune=0, cores=cores)
             # numpy thread mentioned race condition.  might as well check none are equal
             for first, second in combinations(range(cores), 2):
-                first_chain = trace.get_values('x', chains=first)
-                second_chain = trace.get_values('x', chains=second)
+                first_chain = trace.get_values("x", chains=first)
+                second_chain = trace.get_values("x", chains=second)
                 assert not (first_chain == second_chain).all()
-            draws.append(trace.get_values('x'))
+            draws.append(trace.get_values("x"))
             random_numbers.append(np.random.random())
 
         # Make sure future random processes aren't effected by this
@@ -58,77 +60,90 @@ def test_sample(self):
         with self.model:
             for cores in test_cores:
                 for steps in [1, 10, 300]:
-                    pm.sample(steps, tune=0, step=self.step, cores=cores,
-                              random_seed=self.random_seed)
+                    pm.sample(
+                        steps,
+                        tune=0,
+                        step=self.step,
+                        cores=cores,
+                        random_seed=self.random_seed,
+                    )
 
     def test_sample_init(self):
         with self.model:
-            for init in ('advi', 'advi_map', 'map', 'nuts'):
-                pm.sample(init=init, tune=0,
-                          n_init=1000, draws=50,
-                          random_seed=self.random_seed)
+            for init in ("advi", "advi_map", "map", "nuts"):
+                pm.sample(
+                    init=init,
+                    tune=0,
+                    n_init=1000,
+                    draws=50,
+                    random_seed=self.random_seed,
+                )
 
     def test_sample_args(self):
         with self.model:
             with pytest.raises(TypeError) as excinfo:
-                pm.sample(50, tune=0, init=None, step_kwargs={'nuts': {'foo': 1}})
+                pm.sample(50, tune=0, init=None, step_kwargs={"nuts": {"foo": 1}})
             assert "'foo'" in str(excinfo.value)
 
             with pytest.raises(ValueError) as excinfo:
-                pm.sample(50, tune=0, init=None, step_kwargs={'foo': {}})
-            assert 'foo' in str(excinfo.value)
+                pm.sample(50, tune=0, init=None, step_kwargs={"foo": {}})
+            assert "foo" in str(excinfo.value)
 
-            pm.sample(10, tune=0, init=None, nuts_kwargs={'target_accept': 0.9})
+            pm.sample(10, tune=0, init=None, nuts_kwargs={"target_accept": 0.9})
 
             with pytest.raises(ValueError) as excinfo:
                 pm.sample(5, tune=0, init=None, step_kwargs={}, nuts_kwargs={})
-            assert 'Specify only one' in str(excinfo.value)
+            assert "Specify only one" in str(excinfo.value)
 
     def test_iter_sample(self):
         with self.model:
-            samps = pm.sampling.iter_sample(draws=5, step=self.step,
-                                            start=self.start, tune=0,
-                                            random_seed=self.random_seed)
+            samps = pm.sampling.iter_sample(
+                draws=5,
+                step=self.step,
+                start=self.start,
+                tune=0,
+                random_seed=self.random_seed,
+            )
             for i, trace in enumerate(samps):
                 assert i == len(trace) - 1, "Trace does not have correct length."
 
     def test_parallel_start(self):
         with self.model:
-            tr = pm.sample(0, tune=5, cores=2,
-                           discard_tuned_samples=False,
-                           start=[{'x': [10, 10]}, {'x': [-10, -10]}],
-                           random_seed=self.random_seed)
-        assert tr.get_values('x', chains=0)[0][0] > 0
-        assert tr.get_values('x', chains=1)[0][0] < 0
+            tr = pm.sample(
+                0,
+                tune=5,
+                cores=2,
+                discard_tuned_samples=False,
+                start=[{"x": [10, 10]}, {"x": [-10, -10]}],
+                random_seed=self.random_seed,
+            )
+        assert tr.get_values("x", chains=0)[0][0] > 0
+        assert tr.get_values("x", chains=1)[0][0] < 0
 
     def test_sample_tune_len(self):
         with self.model:
             trace = pm.sample(draws=100, tune=50, cores=1)
             assert len(trace) == 100
-            trace = pm.sample(draws=100, tune=50, cores=1,
-                              discard_tuned_samples=False)
+            trace = pm.sample(draws=100, tune=50, cores=1, discard_tuned_samples=False)
             assert len(trace) == 150
             trace = pm.sample(draws=100, tune=50, cores=4)
             assert len(trace) == 100
 
     @pytest.mark.parametrize(
-        'start, error', [
+        "start, error",
+        [
             ([1, 2], TypeError),
-            ({'x': 1}, ValueError),
-            ({'x': [1, 2, 3]}, ValueError),
-            ({'x': np.array([[1, 1], [1, 1]])}, ValueError)
-        ]
+            ({"x": 1}, ValueError),
+            ({"x": [1, 2, 3]}, ValueError),
+            ({"x": np.array([[1, 1], [1, 1]])}, ValueError),
+        ],
     )
     def test_sample_start_bad_shape(self, start, error):
         with pytest.raises(error):
             pm.sampling._check_start_shape(self.model, start)
 
     @pytest.mark.parametrize(
-        'start', [
-            {'x': np.array([1, 1])},
-            {'x': [10, 10]},
-            {'x': [-10, -10]},
-        ]
+        "start", [{"x": np.array([1, 1])}, {"x": [10, 10]}, {"x": [-10, -10]}]
     )
     def test_sample_start_good_shape(self, start):
         pm.sampling._check_start_shape(self.model, start)
@@ -136,86 +151,101 @@ def test_sample_start_good_shape(self, start):
 
 def test_empty_model():
     with pm.Model():
-        pm.Normal('a', observed=1)
+        pm.Normal("a", observed=1)
         with pytest.raises(ValueError) as error:
             pm.sample()
-        error.match('any free variables')
+        error.match("any free variables")
 
 
 def test_partial_trace_sample():
     with pm.Model() as model:
-        a = pm.Normal('a', mu=0, sd=1)
-        b = pm.Normal('b', mu=0, sd=1)
+        a = pm.Normal("a", mu=0, sd=1)
+        b = pm.Normal("b", mu=0, sd=1)
         trace = pm.sample(trace=[a])
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestNamedSampling(SeededTest):
     def test_shared_named(self):
-        G_var = shared(value=np.atleast_2d(1.), broadcastable=(True, False),
-                       name="G")
+        G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False), name="G")
 
         with pm.Model():
-            theta0 = pm.Normal('theta0', mu=np.atleast_2d(0),
-                               tau=np.atleast_2d(1e20), shape=(1, 1),
-                               testval=np.atleast_2d(0))
-            theta = pm.Normal('theta', mu=tt.dot(G_var, theta0),
-                              tau=np.atleast_2d(1e20), shape=(1, 1))
+            theta0 = pm.Normal(
+                "theta0",
+                mu=np.atleast_2d(0),
+                tau=np.atleast_2d(1e20),
+                shape=(1, 1),
+                testval=np.atleast_2d(0),
+            )
+            theta = pm.Normal(
+                "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
+            )
             res = theta.random()
-            assert np.isclose(res, 0.)
+            assert np.isclose(res, 0.0)
 
     def test_shared_unnamed(self):
-        G_var = shared(value=np.atleast_2d(1.), broadcastable=(True, False))
+        G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False))
         with pm.Model():
-            theta0 = pm.Normal('theta0', mu=np.atleast_2d(0),
-                               tau=np.atleast_2d(1e20), shape=(1, 1),
-                               testval=np.atleast_2d(0))
-            theta = pm.Normal('theta', mu=tt.dot(G_var, theta0),
-                              tau=np.atleast_2d(1e20), shape=(1, 1))
+            theta0 = pm.Normal(
+                "theta0",
+                mu=np.atleast_2d(0),
+                tau=np.atleast_2d(1e20),
+                shape=(1, 1),
+                testval=np.atleast_2d(0),
+            )
+            theta = pm.Normal(
+                "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
+            )
             res = theta.random()
-            assert np.isclose(res, 0.)
+            assert np.isclose(res, 0.0)
 
     def test_constant_named(self):
-        G_var = tt.constant(np.atleast_2d(1.), name="G")
+        G_var = tt.constant(np.atleast_2d(1.0), name="G")
         with pm.Model():
-            theta0 = pm.Normal('theta0', mu=np.atleast_2d(0),
-                               tau=np.atleast_2d(1e20), shape=(1, 1),
-                               testval=np.atleast_2d(0))
-            theta = pm.Normal('theta', mu=tt.dot(G_var, theta0),
-                              tau=np.atleast_2d(1e20), shape=(1, 1))
+            theta0 = pm.Normal(
+                "theta0",
+                mu=np.atleast_2d(0),
+                tau=np.atleast_2d(1e20),
+                shape=(1, 1),
+                testval=np.atleast_2d(0),
+            )
+            theta = pm.Normal(
+                "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1)
+            )
 
             res = theta.random()
-            assert np.isclose(res, 0.)
+            assert np.isclose(res, 0.0)
 
 
 class TestChooseBackend(object):
     def test_choose_backend_none(self):
-        with mock.patch('pymc3.sampling.NDArray') as nd:
-            pm.sampling._choose_backend(None, 'chain')
+        with mock.patch("pymc3.sampling.NDArray") as nd:
+            pm.sampling._choose_backend(None, "chain")
         assert nd.called
 
     def test_choose_backend_list_of_variables(self):
-        with mock.patch('pymc3.sampling.NDArray') as nd:
-            pm.sampling._choose_backend(['var1', 'var2'], 'chain')
-        nd.assert_called_with(vars=['var1', 'var2'])
+        with mock.patch("pymc3.sampling.NDArray") as nd:
+            pm.sampling._choose_backend(["var1", "var2"], "chain")
+        nd.assert_called_with(vars=["var1", "var2"])
 
     def test_choose_backend_invalid(self):
         with pytest.raises(ValueError):
-            pm.sampling._choose_backend('invalid', 'chain')
+            pm.sampling._choose_backend("invalid", "chain")
 
     def test_choose_backend_shortcut(self):
         backend = mock.Mock()
-        shortcuts = {'test_backend': {'backend': backend,
-                                      'name': None}}
-        pm.sampling._choose_backend('test_backend', 'chain', shortcuts=shortcuts)
+        shortcuts = {"test_backend": {"backend": backend, "name": None}}
+        pm.sampling._choose_backend("test_backend", "chain", shortcuts=shortcuts)
         assert backend.called
 
 
 class TestSamplePPC(SeededTest):
     def test_normal_scalar(self):
         with pm.Model() as model:
-            mu = pm.Normal('mu', 0., 1.)
-            a = pm.Normal('a', mu=mu, sd=1, observed=0.)
+            mu = pm.Normal("mu", 0.0, 1.0)
+            a = pm.Normal("a", mu=mu, sd=1, observed=0.0)
             trace = pm.sample()
 
         with model:
@@ -224,21 +254,19 @@ def test_normal_scalar(self):
             ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[])
             assert len(ppc) == 0
             ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[a])
-            assert 'a' in ppc
-            assert ppc['a'].shape == (1000,)
-        _, pval = stats.kstest(ppc['a'],
-                               stats.norm(loc=0, scale=np.sqrt(2)).cdf)
+            assert "a" in ppc
+            assert ppc["a"].shape == (1000,)
+        _, pval = stats.kstest(ppc["a"], stats.norm(loc=0, scale=np.sqrt(2)).cdf)
         assert pval > 0.001
 
         with model:
             ppc = pm.sample_posterior_predictive(trace, samples=10, size=5, vars=[a])
-            assert ppc['a'].shape == (10, 5)
+            assert ppc["a"].shape == (10, 5)
 
     def test_normal_vector(self):
         with pm.Model() as model:
-            mu = pm.Normal('mu', 0., 1.)
-            a = pm.Normal('a', mu=mu, sd=1,
-                          observed=np.array([.5, .2]))
+            mu = pm.Normal("mu", 0.0, 1.0)
+            a = pm.Normal("a", mu=mu, sd=1, observed=np.array([0.5, 0.2]))
             trace = pm.sample()
 
         with model:
@@ -247,18 +275,17 @@ def test_normal_vector(self):
             ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[])
             assert len(ppc) == 0
             ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a])
-            assert 'a' in ppc
-            assert ppc['a'].shape == (10, 2)
+            assert "a" in ppc
+            assert ppc["a"].shape == (10, 2)
 
             ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a], size=4)
-            assert 'a' in ppc
-            assert ppc['a'].shape == (10, 4, 2)
+            assert "a" in ppc
+            assert ppc["a"].shape == (10, 4, 2)
 
     def test_vector_observed(self):
         with pm.Model() as model:
-            mu = pm.Normal('mu', mu=0, sd=1)
-            a = pm.Normal('a', mu=mu, sd=1,
-                          observed=np.array([0., 1.]))
+            mu = pm.Normal("mu", mu=0, sd=1)
+            a = pm.Normal("a", mu=mu, sd=1, observed=np.array([0.0, 1.0]))
             trace = pm.sample()
 
         with model:
@@ -267,17 +294,17 @@ def test_vector_observed(self):
             ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[])
             assert len(ppc) == 0
             ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a])
-            assert 'a' in ppc
-            assert ppc['a'].shape == (10, 2)
+            assert "a" in ppc
+            assert ppc["a"].shape == (10, 2)
 
             ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a], size=4)
-            assert 'a' in ppc
-            assert ppc['a'].shape == (10, 4, 2)
+            assert "a" in ppc
+            assert ppc["a"].shape == (10, 4, 2)
 
     def test_sum_normal(self):
         with pm.Model() as model:
-            a = pm.Normal('a', sd=0.2)
-            b = pm.Normal('b', mu=a)
+            a = pm.Normal("a", sd=0.2)
+            b = pm.Normal("b", mu=a)
             trace = pm.sample()
 
         with model:
@@ -285,9 +312,9 @@ def test_sum_normal(self):
             ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
             ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[b])
             assert len(ppc) == 1
-            assert ppc['b'].shape == (1000,)
+            assert ppc["b"].shape == (1000,)
             scale = np.sqrt(1 + 0.2 ** 2)
-            _, pval = stats.kstest(ppc['b'], stats.norm(scale=scale).cdf)
+            _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf)
             assert pval > 0.001
 
 
@@ -296,85 +323,97 @@ def test_sample_posterior_predictive_w(self):
         data0 = np.random.normal(0, 1, size=500)
 
         with pm.Model() as model_0:
-            mu = pm.Normal('mu', mu=0, sd=1)
-            y = pm.Normal('y', mu=mu, sd=1, observed=data0)
+            mu = pm.Normal("mu", mu=0, sd=1)
+            y = pm.Normal("y", mu=mu, sd=1, observed=data0)
             trace_0 = pm.sample()
 
         with pm.Model() as model_1:
-            mu = pm.Normal('mu', mu=0, sd=1, shape=len(data0))
-            y = pm.Normal('y', mu=mu, sd=1, observed=data0)
+            mu = pm.Normal("mu", mu=0, sd=1, shape=len(data0))
+            y = pm.Normal("y", mu=mu, sd=1, observed=data0)
             trace_1 = pm.sample()
 
         traces = [trace_0, trace_0]
         models = [model_0, model_0]
         ppc = pm.sample_posterior_predictive_w(traces, 100, models)
-        assert ppc['y'].shape == (100, 500)
+        assert ppc["y"].shape == (100, 500)
 
         traces = [trace_0, trace_1]
         models = [model_0, model_1]
         ppc = pm.sample_posterior_predictive_w(traces, 100, models)
-        assert ppc['y'].shape == (100, 500)
-
-
-@pytest.mark.parametrize('method', [
-    'jitter+adapt_diag', 'adapt_diag', 'advi', 'ADVI+adapt_diag',
-    'advi+adapt_diag_grad', 'map', 'advi_map', 'nuts'
-])
+        assert ppc["y"].shape == (100, 500)
+
+
+@pytest.mark.parametrize(
+    "method",
+    [
+        "jitter+adapt_diag",
+        "adapt_diag",
+        "advi",
+        "ADVI+adapt_diag",
+        "advi+adapt_diag_grad",
+        "map",
+        "advi_map",
+        "nuts",
+    ],
+)
 def test_exec_nuts_init(method):
     with pm.Model() as model:
-        pm.Normal('a', mu=0, sd=1, shape=2)
-        pm.HalfNormal('b', sd=1)
+        pm.Normal("a", mu=0, sd=1, shape=2)
+        pm.HalfNormal("b", sd=1)
     with model:
         start, _ = pm.init_nuts(init=method, n_init=10)
         assert isinstance(start, list)
         assert len(start) == 1
         assert isinstance(start[0], dict)
-        assert 'a' in start[0] and 'b_log__' in start[0]
+        assert "a" in start[0] and "b_log__" in start[0]
         start, _ = pm.init_nuts(init=method, n_init=10, chains=2)
         assert isinstance(start, list)
         assert len(start) == 2
         assert isinstance(start[0], dict)
-        assert 'a' in start[0] and 'b_log__' in start[0]
+        assert "a" in start[0] and "b_log__" in start[0]
+
 
 class TestSamplePriorPredictive(SeededTest):
     def test_ignores_observed(self):
         observed = np.random.normal(10, 1, size=200)
         with pm.Model():
             # Use a prior that's way off to show we're ignoring the observed variables
-            mu = pm.Normal('mu', mu=-100, sd=1)
-            positive_mu = pm.Deterministic('positive_mu', np.abs(mu))
+            mu = pm.Normal("mu", mu=-100, sd=1)
+            positive_mu = pm.Deterministic("positive_mu", np.abs(mu))
             z = -1 - positive_mu
-            pm.Normal('x_obs', mu=z, sd=1, observed=observed)
+            pm.Normal("x_obs", mu=z, sd=1, observed=observed)
             prior = pm.sample_prior_predictive()
 
-        assert (prior['mu'] < 90).all()
-        assert (prior['positive_mu'] > 90).all()
-        assert (prior['x_obs'] < 90).all()
-        assert prior['x_obs'].shape == (500, 200)
-        npt.assert_array_almost_equal(prior['positive_mu'], np.abs(prior['mu']), decimal=4)
+        assert (prior["mu"] < 90).all()
+        assert (prior["positive_mu"] > 90).all()
+        assert (prior["x_obs"] < 90).all()
+        assert prior["x_obs"].shape == (500, 200)
+        npt.assert_array_almost_equal(
+            prior["positive_mu"], np.abs(prior["mu"]), decimal=4
+        )
 
     def test_respects_shape(self):
         for shape in (2, (2,), (10, 2), (10, 10)):
             with pm.Model():
-                mu = pm.Gamma('mu', 3, 1, shape=1)
-                goals = pm.Poisson('goals', mu, shape=shape)
+                mu = pm.Gamma("mu", 3, 1, shape=1)
+                goals = pm.Poisson("goals", mu, shape=shape)
                 trace = pm.sample_prior_predictive(10)
             if shape == 2:  # want to test shape as an int
                 shape = (2,)
-            assert trace['goals'].shape == (10,) + shape
+            assert trace["goals"].shape == (10,) + shape
 
     def test_multivariate(self):
         with pm.Model():
-            m = pm.Multinomial('m', n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4)
+            m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4)
             trace = pm.sample_prior_predictive(10)
 
         assert m.random(size=10).shape == (10, 4)
-        assert trace['m'].shape == (10, 4)
+        assert trace["m"].shape == (10, 4)
 
     def test_layers(self):
         with pm.Model() as model:
-            a = pm.Uniform('a', lower=0, upper=1, shape=10)
-            b = pm.Binomial('b', n=1, p=a, shape=10)
+            a = pm.Uniform("a", lower=0, upper=1, shape=10)
+            b = pm.Binomial("b", n=1, p=a, shape=10)
 
         avg = b.random(size=10000).mean(axis=0)
         npt.assert_array_almost_equal(avg, 0.5 * np.ones_like(b), decimal=2)
@@ -386,54 +425,61 @@ def test_transformed(self):
         draws = 50
 
         with pm.Model() as model:
-            phi = pm.Beta('phi', alpha=1., beta=1.)
+            phi = pm.Beta("phi", alpha=1.0, beta=1.0)
 
-            kappa_log = pm.Exponential('logkappa', lam=5.)
-            kappa = pm.Deterministic('kappa', tt.exp(kappa_log))
+            kappa_log = pm.Exponential("logkappa", lam=5.0)
+            kappa = pm.Deterministic("kappa", tt.exp(kappa_log))
 
-            thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=n)
+            thetas = pm.Beta(
+                "thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n
+            )
 
-            y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits)
+            y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits)
             gen = pm.sample_prior_predictive(draws)
 
-        assert gen['phi'].shape == (draws,)
-        assert gen['y'].shape == (draws, n)
-        assert 'thetas_logodds__' in gen
+        assert gen["phi"].shape == (draws,)
+        assert gen["y"].shape == (draws, n)
+        assert "thetas_logodds__" in gen
 
     def test_shared(self):
         n1 = 10
-        obs = shared(np.random.rand(n1) < .5)
+        obs = shared(np.random.rand(n1) < 0.5)
         draws = 50
 
         with pm.Model() as m:
-            p = pm.Beta('p', 1., 1.)
-            y = pm.Bernoulli('y', p, observed=obs)
+            p = pm.Beta("p", 1.0, 1.0)
+            y = pm.Bernoulli("y", p, observed=obs)
             gen1 = pm.sample_prior_predictive(draws)
 
-        assert gen1['y'].shape == (draws, n1)
+        assert gen1["y"].shape == (draws, n1)
 
         n2 = 20
-        obs.set_value(np.random.rand(n2) < .5)
+        obs.set_value(np.random.rand(n2) < 0.5)
         with m:
             gen2 = pm.sample_prior_predictive(draws)
 
-        assert gen2['y'].shape == (draws, n2)
+        assert gen2["y"].shape == (draws, n2)
 
     def test_density_dist(self):
 
         obs = np.random.normal(-1, 0.1, size=10)
         with pm.Model():
-            mu = pm.Normal('mu', 0, 1)
-            sd = pm.Gamma('sd', 1, 2)
-            a = pm.DensityDist('a', pm.Normal.dist(mu, sd).logp, random=pm.Normal.dist(mu, sd).random, observed=obs)
+            mu = pm.Normal("mu", 0, 1)
+            sd = pm.Gamma("sd", 1, 2)
+            a = pm.DensityDist(
+                "a",
+                pm.Normal.dist(mu, sd).logp,
+                random=pm.Normal.dist(mu, sd).random,
+                observed=obs,
+            )
             prior = pm.sample_prior_predictive()
 
-        npt.assert_almost_equal(prior['a'].mean(), 0, decimal=1)
+        npt.assert_almost_equal(prior["a"].mean(), 0, decimal=1)
 
     def test_shape_edgecase(self):
         with pm.Model():
-            mu = pm.Normal('mu', shape=5)
-            sd = pm.Uniform('sd', lower=2, upper=3)
-            x = pm.Normal('x', mu=mu, sd=sd, shape=5)
+            mu = pm.Normal("mu", shape=5)
+            sd = pm.Uniform("sd", lower=2, upper=3)
+            x = pm.Normal("x", mu=mu, sd=sd, shape=5)
             prior = pm.sample_prior_predictive(10)
-        assert prior['mu'].shape == (10, 5)
+        assert prior["mu"].shape == (10, 5)
diff --git a/pymc3/tests/test_sgfs.py b/pymc3/tests/test_sgfs.py
index 06d8749443..421292bf40 100644
--- a/pymc3/tests/test_sgfs.py
+++ b/pymc3/tests/test_sgfs.py
@@ -3,34 +3,41 @@
 from pymc3 import Model, Normal
 import theano.tensor as tt
 
+
 def test_minibatch():
     draws = 3000
     mu0 = 1
     sd0 = 1
-    
+
     def f(x, a, b, c):
-        return a*x**2 + b*x + c
-    
+        return a * x ** 2 + b * x + c
+
     a, b, c = 1, 2, 3
 
     batch_size = 50
-    total_size = batch_size*500
-    x_train = np.random.uniform(-10, 10, size=(total_size,)).astype('float32')
+    total_size = batch_size * 500
+    x_train = np.random.uniform(-10, 10, size=(total_size,)).astype("float32")
     x_obs = pm.data.Minibatch(x_train, batch_size=batch_size)
 
-    y_train = f(x_train, a, b, c) + np.random.normal(size=x_train.shape).astype('float32')
+    y_train = f(x_train, a, b, c) + np.random.normal(size=x_train.shape).astype(
+        "float32"
+    )
     y_obs = pm.data.Minibatch(y_train, batch_size=batch_size)
 
     with Model():
-        abc = Normal('abc', mu=mu0, sd=sd0, shape=(3,))
+        abc = Normal("abc", mu=mu0, sd=sd0, shape=(3,))
         x = x_obs
-        x2 = x**2
+        x2 = x ** 2
         o = tt.ones_like(x)
         X = tt.stack([x2, x, o]).T
         y = X.dot(abc)
-        pm.Normal('y', mu=y, observed=y_obs)
+        pm.Normal("y", mu=y, observed=y_obs)
 
-        step_method = pm.SGFS(batch_size=batch_size, step_size=1., total_size=total_size)
+        step_method = pm.SGFS(
+            batch_size=batch_size, step_size=1.0, total_size=total_size
+        )
         trace = pm.sample(draws=draws, step=step_method, init=None, cores=2)
 
-    np.testing.assert_allclose(np.mean(trace['abc'], axis=0), np.asarray([a, b, c]), rtol=0.1)
+    np.testing.assert_allclose(
+        np.mean(trace["abc"], axis=0), np.asarray([a, b, c]), rtol=0.1
+    )
diff --git a/pymc3/tests/test_shared.py b/pymc3/tests/test_shared.py
index 6106c1c68b..e4b1b3b2c9 100644
--- a/pymc3/tests/test_shared.py
+++ b/pymc3/tests/test_shared.py
@@ -7,9 +7,11 @@
 class TestShared(SeededTest):
     def test_deterministic(self):
         with pm.Model() as model:
-            data_values = np.array([.5, .4, 5, 2])
-            X = theano.shared(np.asarray(data_values, dtype=theano.config.floatX), borrow=True)
-            pm.Normal('y', 0, 1, observed=X)
+            data_values = np.array([0.5, 0.4, 5, 2])
+            X = theano.shared(
+                np.asarray(data_values, dtype=theano.config.floatX), borrow=True
+            )
+            pm.Normal("y", 0, 1, observed=X)
             model.logp(model.test_point)
 
     def test_sample(self):
@@ -21,8 +23,8 @@ def test_sample(self):
         x_shared = theano.shared(x)
 
         with pm.Model() as model:
-            b = pm.Normal('b', 0., 10.)
-            pm.Normal('obs', b * x_shared, np.sqrt(1e-2), observed=y)
+            b = pm.Normal("b", 0.0, 10.0)
+            pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y)
             prior_trace0 = pm.sample_prior_predictive(1000)
 
             trace = pm.sample(1000, init=None, progressbar=False)
@@ -32,10 +34,10 @@ def test_sample(self):
             prior_trace1 = pm.sample_prior_predictive(1000)
             pp_trace1 = pm.sample_posterior_predictive(trace, 1000)
 
-        assert prior_trace0['b'].shape == (1000,)
-        assert prior_trace0['obs'].shape == (1000, 100)
-        np.testing.assert_allclose(x, pp_trace0['obs'].mean(axis=0), atol=1e-1)
+        assert prior_trace0["b"].shape == (1000,)
+        assert prior_trace0["obs"].shape == (1000, 100)
+        np.testing.assert_allclose(x, pp_trace0["obs"].mean(axis=0), atol=1e-1)
 
-        assert prior_trace1['b'].shape == (1000,)
-        assert prior_trace1['obs'].shape == (1000, 200)
-        np.testing.assert_allclose(x_pred, pp_trace1['obs'].mean(axis=0), atol=1e-1)
+        assert prior_trace1["b"].shape == (1000,)
+        assert prior_trace1["obs"].shape == (1000, 200)
+        np.testing.assert_allclose(x_pred, pp_trace1["obs"].mean(axis=0), atol=1e-1)
diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py
index 90732f8645..987ea8fea6 100644
--- a/pymc3/tests/test_smc.py
+++ b/pymc3/tests/test_smc.py
@@ -6,13 +6,12 @@
 
 
 class TestSMC(SeededTest):
-
     def setup_class(self):
         super(TestSMC, self).setup_class()
         self.samples = 1000
         n = 4
-        mu1 = np.ones(n) * (1. / 2)
-        mu2 = - mu1
+        mu1 = np.ones(n) * (1.0 / 2)
+        mu2 = -mu1
 
         stdev = 0.1
         sigma = np.power(stdev, 2) * np.eye(n)
@@ -20,46 +19,46 @@ def setup_class(self):
         dsigma = np.linalg.det(sigma)
 
         w1 = stdev
-        w2 = (1 - stdev)
+        w2 = 1 - stdev
 
         def two_gaussians(x):
-            log_like1 = - 0.5 * n * tt.log(2 * np.pi) \
-                        - 0.5 * tt.log(dsigma) \
-                        - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)
-            log_like2 = - 0.5 * n * tt.log(2 * np.pi) \
-                        - 0.5 * tt.log(dsigma) \
-                        - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
+            log_like1 = (
+                -0.5 * n * tt.log(2 * np.pi)
+                - 0.5 * tt.log(dsigma)
+                - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)
+            )
+            log_like2 = (
+                -0.5 * n * tt.log(2 * np.pi)
+                - 0.5 * tt.log(dsigma)
+                - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
+            )
             return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2))
 
         with pm.Model() as self.SMC_test:
-            X = pm.Uniform('X', lower=-2, upper=2., shape=n)
-            llk = pm.Potential('muh', two_gaussians(X))
+            X = pm.Uniform("X", lower=-2, upper=2.0, shape=n)
+            llk = pm.Potential("muh", two_gaussians(X))
 
         self.muref = mu1
 
-
     def test_sample(self):
         with self.SMC_test:
-            mtrace = pm.sample(draws=self.samples,
-                               step = pm.SMC())
+            mtrace = pm.sample(draws=self.samples, step=pm.SMC())
 
-        x = mtrace['X']
+        x = mtrace["X"]
         mu1d = np.abs(x).mean(axis=0)
-        np.testing.assert_allclose(self.muref, mu1d, rtol=0., atol=0.03)
+        np.testing.assert_allclose(self.muref, mu1d, rtol=0.0, atol=0.03)
 
     def test_ml(self):
         data = np.repeat([1, 0], [50, 50])
         marginals = []
-        a_prior_0, b_prior_0 = 1., 1.
-        a_prior_1, b_prior_1 = 20., 20.
+        a_prior_0, b_prior_0 = 1.0, 1.0
+        a_prior_1, b_prior_1 = 20.0, 20.0
 
         for alpha, beta in ((a_prior_0, b_prior_0), (a_prior_1, b_prior_1)):
             with pm.Model() as model:
-                a = pm.Beta('a', alpha, beta)
-                y = pm.Bernoulli('y', a, observed=data)
+                a = pm.Beta("a", alpha, beta)
+                y = pm.Bernoulli("y", a, observed=data)
                 trace = pm.sample(2000, step=pm.SMC())
                 marginals.append(model.marginal_likelihood)
         # compare to the analytical result
         assert abs((marginals[1] / marginals[0]) - 4.0) <= 1
-
-
diff --git a/pymc3/tests/test_special_functions.py b/pymc3/tests/test_special_functions.py
index c02f3fba0c..16aedbe9c9 100644
--- a/pymc3/tests/test_special_functions.py
+++ b/pymc3/tests/test_special_functions.py
@@ -9,12 +9,12 @@
 
 
 def test_functions():
-    xvals = list(map(np.atleast_1d, [.01, .1, 2, 100, 10000]))
+    xvals = list(map(np.atleast_1d, [0.01, 0.1, 2, 100, 10000]))
 
-    x = tt.dvector('x')
+    x = tt.dvector("x")
     x.tag.test_value = xvals[0]
 
-    p = tt.iscalar('p')
+    p = tt.iscalar("p")
     p.tag.test_value = 1
 
     gammaln = function([x], ps.gammaln(x))
@@ -25,6 +25,7 @@ def test_functions():
     for x in xvals[1:]:
         check_vals(psi, ss.psi, x)
 
+
 """
 scipy.special.multigammaln gives bad values if you pass a non scalar to a
 In [14]:
@@ -37,12 +38,12 @@ def test_functions():
 
 
 def t_multigamma():
-    xvals = list(map(np.atleast_1d, [0, .1, 2, 100]))
+    xvals = list(map(np.atleast_1d, [0, 0.1, 2, 100]))
 
-    x = tt.dvector('x')
+    x = tt.dvector("x")
     x.tag.test_value = xvals[0]
 
-    p = tt.iscalar('p')
+    p = tt.iscalar("p")
     p.tag.test_value = 1
 
     multigammaln = function([x, p], ps.multigammaln(x, p))
diff --git a/pymc3/tests/test_sqlite_backend.py b/pymc3/tests/test_sqlite_backend.py
index 5106e5af19..b805b6a271 100644
--- a/pymc3/tests/test_sqlite_backend.py
+++ b/pymc3/tests/test_sqlite_backend.py
@@ -5,52 +5,70 @@
 import pytest
 import theano
 
-DBNAME = os.path.join(tempfile.gettempdir(), 'test.db')
+DBNAME = os.path.join(tempfile.gettempdir(), "test.db")
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"),
+    reason="Fails on float32 due to inf issues",
+)
 class TestSQlite0dSampling(bf.SamplingTestCase):
     backend = sqlite.SQLite
     name = DBNAME
     shape = ()
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestSQlite1dSampling(bf.SamplingTestCase):
     backend = sqlite.SQLite
     name = DBNAME
     shape = 2
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"),
+    reason="Fails on float32 due to inf issues",
+)
 class TestSQlite2dSampling(bf.SamplingTestCase):
     backend = sqlite.SQLite
     name = DBNAME
     shape = (2, 3)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"),
+    reason="Fails on float32 due to inf issues",
+)
 class TestSQLite0dSelection(bf.SelectionTestCase):
     backend = sqlite.SQLite
     name = DBNAME
     shape = ()
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestSQLite1dSelection(bf.SelectionTestCase):
     backend = sqlite.SQLite
     name = DBNAME
     shape = 2
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestSQLite2dSelection(bf.SelectionTestCase):
     backend = sqlite.SQLite
     name = DBNAME
     shape = (2, 3)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"),
+    reason="Fails on float32 due to inf issues",
+)
 class TestSQLiteDumpLoad(bf.DumpLoadTestCase):
     backend = sqlite.SQLite
     load_func = staticmethod(sqlite.load)
@@ -58,7 +76,10 @@ class TestSQLiteDumpLoad(bf.DumpLoadTestCase):
     shape = (2, 3)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"),
+    reason="Fails on float32 due to inf issues",
+)
 class TestNDArraySqliteEquality(bf.BackendEqualityTestCase):
     backend0 = ndarray.NDArray
     name0 = None
diff --git a/pymc3/tests/test_starting.py b/pymc3/tests/test_starting.py
index 44819f40bb..c25b6aab8e 100644
--- a/pymc3/tests/test_starting.py
+++ b/pymc3/tests/test_starting.py
@@ -10,35 +10,37 @@ def test_accuracy_normal():
     _, model, (mu, _) = simple_model()
     with model:
         newstart = find_MAP(Point(x=[-10.5, 100.5]))
-        close_to(newstart['x'], [mu, mu], select_by_precision(float64=1e-5, float32=1E-4))
+        close_to(
+            newstart["x"], [mu, mu], select_by_precision(float64=1e-5, float32=1e-4)
+        )
 
 
 def test_accuracy_non_normal():
     _, model, (mu, _) = non_normal(4)
     with model:
-        newstart = find_MAP(Point(x=[.5, .01, .95, .99]))
-        close_to(newstart['x'], mu, select_by_precision(float64=1e-5, float32=1E-4))
+        newstart = find_MAP(Point(x=[0.5, 0.01, 0.95, 0.99]))
+        close_to(newstart["x"], mu, select_by_precision(float64=1e-5, float32=1e-4))
 
 
 def test_find_MAP_discrete():
-    tol = 2.0**-11
+    tol = 2.0 ** -11
     alpha = 4
     beta = 4
     n = 20
     yes = 15
 
     with Model() as model:
-        p = Beta('p', alpha, beta)
-        Binomial('ss', n=n, p=p)
-        Binomial('s', n=n, p=p, observed=yes)
+        p = Beta("p", alpha, beta)
+        Binomial("ss", n=n, p=p)
+        Binomial("s", n=n, p=p, observed=yes)
 
         map_est1 = starting.find_MAP()
         map_est2 = starting.find_MAP(vars=model.vars)
 
-    close_to(map_est1['p'], 0.6086956533498806, tol)
+    close_to(map_est1["p"], 0.6086956533498806, tol)
 
-    close_to(map_est2['p'], 0.695642178810167, tol)
-    assert map_est2['ss'] == 14
+    close_to(map_est2["p"], 0.695642178810167, tol)
+    assert map_est2["ss"] == 14
 
 
 def test_find_MAP_no_gradient():
@@ -48,24 +50,24 @@ def test_find_MAP_no_gradient():
 
 
 def test_find_MAP():
-    tol = 2.0**-11  # 16 bit machine epsilon, a low bar
+    tol = 2.0 ** -11  # 16 bit machine epsilon, a low bar
     data = np.random.randn(100)
     # data should be roughly mean 0, std 1, but let's
     # normalize anyway to get it really close
     data = (data - np.mean(data)) / np.std(data)
 
     with Model():
-        mu = Uniform('mu', -1, 1)
-        sigma = Uniform('sigma', .5, 1.5)
-        Normal('y', mu=mu, tau=sigma**-2, observed=data)
+        mu = Uniform("mu", -1, 1)
+        sigma = Uniform("sigma", 0.5, 1.5)
+        Normal("y", mu=mu, tau=sigma ** -2, observed=data)
 
         # Test gradient minimization
         map_est1 = starting.find_MAP(progressbar=False)
         # Test non-gradient minimization
         map_est2 = starting.find_MAP(progressbar=False, method="Powell")
 
-    close_to(map_est1['mu'], 0, tol)
-    close_to(map_est1['sigma'], 1, tol)
+    close_to(map_est1["mu"], 0, tol)
+    close_to(map_est1["sigma"], 1, tol)
 
-    close_to(map_est2['mu'], 0, tol)
-    close_to(map_est2['sigma'], 1, tol)
+    close_to(map_est2["mu"], 0, tol)
+    close_to(map_est2["sigma"], 1, tol)
diff --git a/pymc3/tests/test_stats.py b/pymc3/tests/test_stats.py
index 3a2e81e783..0206efe2a6 100644
--- a/pymc3/tests/test_stats.py
+++ b/pymc3/tests/test_stats.py
@@ -6,8 +6,17 @@
 from .helpers import SeededTest
 from ..tests import backend_fixtures as bf
 from ..backends import ndarray
-from ..stats import (summary, autocorr, autocov, hpd, mc_error, quantiles,
-                     make_indices, bfmi, r2_score)
+from ..stats import (
+    summary,
+    autocorr,
+    autocov,
+    hpd,
+    mc_error,
+    quantiles,
+    make_indices,
+    bfmi,
+    r2_score,
+)
 from ..theanof import floatX_array
 import pymc3.stats as pmstats
 from numpy.random import random, normal
@@ -18,15 +27,15 @@
 
 def test_log_post_trace():
     with pm.Model() as model:
-        pm.Normal('y')
+        pm.Normal("y")
         trace = pm.sample(10, tune=10, chains=1)
 
     logp = pmstats._log_post_trace(trace, model)
     assert logp.shape == (len(trace), 0)
 
     with pm.Model() as model:
-        pm.Normal('a')
-        pm.Normal('y', observed=np.zeros((2, 3)))
+        pm.Normal("a")
+        pm.Normal("y", observed=np.zeros((2, 3)))
         trace = pm.sample(10, tune=10, chains=1)
 
     logp = pmstats._log_post_trace(trace, model)
@@ -34,14 +43,14 @@ def test_log_post_trace():
     npt.assert_allclose(logp, -0.5 * np.log(2 * np.pi), atol=1e-7)
 
     with pm.Model() as model:
-        pm.Normal('a')
-        pm.Normal('y', observed=np.zeros((2, 3)))
+        pm.Normal("a")
+        pm.Normal("y", observed=np.zeros((2, 3)))
         data = pd.DataFrame(np.zeros((3, 4)))
         data.values[1, 1] = np.nan
-        pm.Normal('y2', observed=data)
+        pm.Normal("y2", observed=data)
         data = data.copy()
         data.values[:] = np.nan
-        pm.Normal('y3', observed=data)
+        pm.Normal("y3", observed=data)
         trace = pm.sample(10, tune=10, chains=1)
 
     logp = pmstats._log_post_trace(trace, model)
@@ -54,18 +63,18 @@ def test_compare():
     x_obs = np.random.normal(0, 1, size=100)
 
     with pm.Model() as model0:
-        mu = pm.Normal('mu', 0, 1)
-        x = pm.Normal('x', mu=mu, sd=1, observed=x_obs)
+        mu = pm.Normal("mu", 0, 1)
+        x = pm.Normal("x", mu=mu, sd=1, observed=x_obs)
         trace0 = pm.sample(1000)
 
     with pm.Model() as model1:
-        mu = pm.Normal('mu', 0, 1)
-        x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs)
+        mu = pm.Normal("mu", 0, 1)
+        x = pm.Normal("x", mu=mu, sd=0.8, observed=x_obs)
         trace1 = pm.sample(1000)
 
     with pm.Model() as model2:
-        mu = pm.Normal('mu', 0, 1)
-        x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs)
+        mu = pm.Normal("mu", 0, 1)
+        x = pm.StudentT("x", nu=1, mu=mu, lam=1, observed=x_obs)
         trace2 = pm.sample(1000)
 
     traces = [trace0, copy.copy(trace0)]
@@ -73,34 +82,34 @@ def test_compare():
 
     model_dict = dict(zip(models, traces))
 
-    w_st = pm.compare(model_dict, method='stacking')['weight']
-    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
-    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']
+    w_st = pm.compare(model_dict, method="stacking")["weight"]
+    w_bb_bma = pm.compare(model_dict, method="BB-pseudo-BMA")["weight"]
+    w_bma = pm.compare(model_dict, method="pseudo-BMA")["weight"]
 
     assert_almost_equal(w_st[0], w_st[1])
     assert_almost_equal(w_bb_bma[0], w_bb_bma[1])
     assert_almost_equal(w_bma[0], w_bma[1])
 
-    assert_almost_equal(np.sum(w_st), 1.)
-    assert_almost_equal(np.sum(w_bb_bma), 1.)
-    assert_almost_equal(np.sum(w_bma), 1.)
+    assert_almost_equal(np.sum(w_st), 1.0)
+    assert_almost_equal(np.sum(w_bb_bma), 1.0)
+    assert_almost_equal(np.sum(w_bma), 1.0)
 
     traces = [trace0, trace1, trace2]
     models = [model0, model1, model2]
 
     model_dict = dict(zip(models, traces))
-    
-    w_st = pm.compare(model_dict, method='stacking')['weight']
-    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
-    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']
 
-    assert(w_st[0] > w_st[1] > w_st[2])
-    assert(w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2])
-    assert(w_bma[0] > w_bma[1] > w_bma[2])
+    w_st = pm.compare(model_dict, method="stacking")["weight"]
+    w_bb_bma = pm.compare(model_dict, method="BB-pseudo-BMA")["weight"]
+    w_bma = pm.compare(model_dict, method="pseudo-BMA")["weight"]
 
-    assert_almost_equal(np.sum(w_st), 1.)
-    assert_almost_equal(np.sum(w_st), 1.)
-    assert_almost_equal(np.sum(w_st), 1.)
+    assert w_st[0] > w_st[1] > w_st[2]
+    assert w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2]
+    assert w_bma[0] > w_bma[1] > w_bma[2]
+
+    assert_almost_equal(np.sum(w_st), 1.0)
+    assert_almost_equal(np.sum(w_st), 1.0)
+    assert_almost_equal(np.sum(w_st), 1.0)
 
 
 class TestStats(SeededTest):
@@ -112,12 +121,15 @@ def setup_class(cls):
     def test_autocorr(self):
         """Test autocorrelation and autocovariance functions"""
         assert_almost_equal(autocorr(self.normal_sample)[1], 0, 2)
-        y = [(self.normal_sample[i - 1] + self.normal_sample[i]) /
-             2 for i in range(1, len(self.normal_sample))]
+        y = [
+            (self.normal_sample[i - 1] + self.normal_sample[i]) / 2
+            for i in range(1, len(self.normal_sample))
+        ]
         assert_almost_equal(autocorr(np.asarray(y))[1], 0.5, 2)
         lag = 5
-        acov_np = np.cov(self.normal_sample[:-lag],
-                         self.normal_sample[lag:], bias=1)[0, 1]
+        acov_np = np.cov(self.normal_sample[:-lag], self.normal_sample[lag:], bias=1)[
+            0, 1
+        ]
         acov_pm = autocov(self.normal_sample)[lag]
         assert_almost_equal(acov_pm, acov_np, 7)
 
@@ -126,18 +138,18 @@ def test_waic(self):
         x_obs = np.arange(6)
 
         with pm.Model():
-            p = pm.Beta('p', 1., 1., transform=None)
-            pm.Binomial('x', 5, p, observed=x_obs)
+            p = pm.Beta("p", 1.0, 1.0, transform=None)
+            pm.Binomial("x", 5, p, observed=x_obs)
 
             step = pm.Metropolis()
             trace = pm.sample(100, step)
             calculated_waic = pm.waic(trace)
 
-        log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace['p']).T
+        log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace["p"]).T
 
         lppd_i = np.log(np.mean(np.exp(log_py), axis=0))
         vars_lpd = np.var(log_py, axis=0)
-        waic_i = - 2 * (lppd_i - vars_lpd)
+        waic_i = -2 * (lppd_i - vars_lpd)
 
         actual_waic_se = np.sqrt(len(waic_i) * np.var(waic_i))
         actual_waic = np.sum(waic_i)
@@ -157,7 +169,7 @@ def test_make_indices(self):
 
     def test_mc_error(self):
         """Test batch standard deviation function"""
-        assert(mc_error(random(100000) < 0.0025))
+        assert mc_error(random(100000) < 0.0025)
 
     def test_quantiles(self):
         """Test quantiles function"""
@@ -170,8 +182,8 @@ def test_summary_0d_variable_model(self):
         mu = -2.1
         tau = 1.3
         with Model() as model:
-            Normal('x', mu, tau, testval=floatX_array(.1))
-            step = Metropolis(model.vars, np.diag([1.]), blocked=True)
+            Normal("x", mu, tau, testval=floatX_array(0.1))
+            step = Metropolis(model.vars, np.diag([1.0]), blocked=True)
             trace = pm.sample(100, step=step)
         summary(trace)
 
@@ -179,8 +191,8 @@ def test_summary_1d_variable_model(self):
         mu = -2.1
         tau = 1.3
         with Model() as model:
-            Normal('x', mu, tau, shape=2, testval=floatX_array([.1, .1]))
-            step = Metropolis(model.vars, np.diag([1.]), blocked=True)
+            Normal("x", mu, tau, shape=2, testval=floatX_array([0.1, 0.1]))
+            step = Metropolis(model.vars, np.diag([1.0]), blocked=True)
             trace = pm.sample(100, step=step)
         summary(trace)
 
@@ -188,9 +200,10 @@ def test_summary_2d_variable_model(self):
         mu = -2.1
         tau = 1.3
         with Model() as model:
-            Normal('x', mu, tau, shape=(2, 2),
-                   testval=floatX_array(np.tile(.1, (2, 2))))
-            step = Metropolis(model.vars, np.diag([1.]), blocked=True)
+            Normal(
+                "x", mu, tau, shape=(2, 2), testval=floatX_array(np.tile(0.1, (2, 2)))
+            )
+            step = Metropolis(model.vars, np.diag([1.0]), blocked=True)
             trace = pm.sample(100, step=step)
         summary(trace)
 
@@ -234,15 +247,14 @@ def test_groupby_leading_idxs_2d_variable(self):
     def test_groupby_leading_idxs_3d_variable(self):
         result = {k: list(v) for k, v in pm.stats._groupby_leading_idxs((2, 3, 2))}
 
-        expected_keys = [(0, 0), (0, 1), (0, 2),
-                         (1, 0), (1, 1), (1, 2)]
+        expected_keys = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
         keys = list(result.keys())
         assert len(keys) == len(expected_keys)
         for key in keys:
             assert result[key] == [key + (0,), key + (1,)]
 
     def test_bfmi(self):
-        trace = {'energy': np.array([1, 2, 3, 4])}
+        trace = {"energy": np.array([1, 2, 3, 4])}
 
         assert_almost_equal(bfmi(trace), 0.8)
 
@@ -250,47 +262,61 @@ def test_r2_score(self):
         x = np.linspace(0, 1, 100)
         y = np.random.normal(x, 1)
         res = st.linregress(x, y)
-        assert_almost_equal(res.rvalue ** 2,
-                            r2_score(y, res.intercept +
-                                     res.slope * x).r2_median,
-                            2)
+        assert_almost_equal(
+            res.rvalue ** 2, r2_score(y, res.intercept + res.slope * x).r2_median, 2
+        )
+
 
 class TestDfSummary(bf.ModelBackendSampledTestCase):
     backend = ndarray.NDArray
-    name = 'text-db'
+    name = "text-db"
     shape = (2, 3)
 
     def test_column_names(self):
         ds = summary(self.mtrace, batches=3)
-        npt.assert_equal(np.array(['mean', 'sd', 'mc_error',
-                                   'hpd_2.5', 'hpd_97.5',
-                                   'n_eff', 'Rhat']),
-                         ds.columns)
+        npt.assert_equal(
+            np.array(
+                ["mean", "sd", "mc_error", "hpd_2.5", "hpd_97.5", "n_eff", "Rhat"]
+            ),
+            ds.columns,
+        )
 
     def test_column_names_decimal_hpd(self):
         ds = summary(self.mtrace, batches=3, alpha=0.001)
-        npt.assert_equal(np.array(['mean', 'sd', 'mc_error',
-                                   'hpd_0.05', 'hpd_99.95',
-                                   'n_eff', 'Rhat']),
-                         ds.columns)
+        npt.assert_equal(
+            np.array(
+                ["mean", "sd", "mc_error", "hpd_0.05", "hpd_99.95", "n_eff", "Rhat"]
+            ),
+            ds.columns,
+        )
 
     def test_column_names_custom_function(self):
         def customf(x):
-            return pd.Series(np.mean(x, 0), name='my_mean')
+            return pd.Series(np.mean(x, 0), name="my_mean")
 
         ds = summary(self.mtrace, batches=3, stat_funcs=[customf])
-        npt.assert_equal(np.array(['my_mean']), ds.columns)
+        npt.assert_equal(np.array(["my_mean"]), ds.columns)
 
     def test_column_names_custom_function_extend(self):
         def customf(x):
-            return pd.Series(np.mean(x, 0), name='my_mean')
-
-        ds = summary(self.mtrace, batches=3,
-                        stat_funcs=[customf], extend=True)
-        npt.assert_equal(np.array(['mean', 'sd', 'mc_error',
-                                   'hpd_2.5', 'hpd_97.5', 'my_mean',
-                                   'n_eff', 'Rhat']),
-                         ds.columns)
+            return pd.Series(np.mean(x, 0), name="my_mean")
+
+        ds = summary(self.mtrace, batches=3, stat_funcs=[customf], extend=True)
+        npt.assert_equal(
+            np.array(
+                [
+                    "mean",
+                    "sd",
+                    "mc_error",
+                    "hpd_2.5",
+                    "hpd_97.5",
+                    "my_mean",
+                    "n_eff",
+                    "Rhat",
+                ]
+            ),
+            ds.columns,
+        )
 
     def test_value_alignment(self):
         mtrace = self.mtrace
@@ -299,47 +325,52 @@ def test_value_alignment(self):
             result = mtrace[var].mean(0)
             for idx, val in np.ndenumerate(result):
                 if idx:
-                    vidx = var + '__' + '_'.join([str(i) for i in idx])
+                    vidx = var + "__" + "_".join([str(i) for i in idx])
                 else:
                     vidx = var
-                npt.assert_equal(val, ds.loc[vidx, 'mean'])
+                npt.assert_equal(val, ds.loc[vidx, "mean"])
 
     def test_row_names(self):
         with Model():
-            pm.Uniform('x', 0, 1)
+            pm.Uniform("x", 0, 1)
             step = Metropolis()
             trace = pm.sample(100, step=step)
         ds = summary(trace, batches=3, include_transformed=True)
-        npt.assert_equal(np.array(['x_interval__', 'x']),
-                         ds.index)
+        npt.assert_equal(np.array(["x_interval__", "x"]), ds.index)
 
     def test_value_n_eff_rhat(self):
         mu = -2.1
         tau = 1.3
         with Model():
-            Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d
-            Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d
-            Normal('x2', mu, tau, shape=(2, 2),
-                   testval=floatX_array(np.tile(.1, (2, 2))))# 2d
-            Normal('x3', mu, tau, shape=(2, 2, 3),
-                   testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d
+            Normal("x0", mu, tau, testval=floatX_array(0.1))  # 0d
+            Normal("x1", mu, tau, shape=2, testval=floatX_array([0.1, 0.1]))  # 1d
+            Normal(
+                "x2", mu, tau, shape=(2, 2), testval=floatX_array(np.tile(0.1, (2, 2)))
+            )  # 2d
+            Normal(
+                "x3",
+                mu,
+                tau,
+                shape=(2, 2, 3),
+                testval=floatX_array(np.tile(0.1, (2, 2, 3))),
+            )  # 3d
             trace = pm.sample(100, step=pm.Metropolis())
         for varname in trace.varnames:
             # test effective_n value
             n_eff = pm.effective_n(trace, varnames=[varname])[varname]
             n_eff_df = np.asarray(
-                    pm.summary(trace, varnames=[varname])['n_eff']
-                                 ).reshape(n_eff.shape)
+                pm.summary(trace, varnames=[varname])["n_eff"]
+            ).reshape(n_eff.shape)
             npt.assert_equal(n_eff, n_eff_df)
-            
+
             # test Rhat value
             rhat = pm.gelman_rubin(trace, varnames=[varname])[varname]
-            rhat_df = np.asarray(
-                    pm.summary(trace, varnames=[varname])['Rhat']
-                                 ).reshape(rhat.shape)
+            rhat_df = np.asarray(pm.summary(trace, varnames=[varname])["Rhat"]).reshape(
+                rhat.shape
+            )
             npt.assert_equal(rhat, rhat_df)
 
     def test_psis(self):
         lw = np.random.randn(20000, 10)
-        _, ks = pm.stats._psislw(lw, 1.)
-        npt.assert_array_less(ks, .5)
+        _, ks = pm.stats._psislw(lw, 1.0)
+        npt.assert_array_less(ks, 0.5)
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index ca67369c5b..a6f591bfeb 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -2,17 +2,38 @@
 import tempfile
 
 from .checks import close_to
-from .models import (simple_categorical, mv_simple, mv_simple_discrete,
-                     mv_prior_simple, simple_2model_continuous)
+from .models import (
+    simple_categorical,
+    mv_simple,
+    mv_simple_discrete,
+    mv_prior_simple,
+    simple_2model_continuous,
+)
 from pymc3.sampling import assign_step_methods, sample
 from pymc3.model import Model
-from pymc3.step_methods import (NUTS, BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
-                                Metropolis, Slice, CompoundStep, NormalProposal,
-                                MultivariateNormalProposal, HamiltonianMC,
-                                EllipticalSlice, SMC, DEMetropolis)
+from pymc3.step_methods import (
+    NUTS,
+    BinaryGibbsMetropolis,
+    CategoricalGibbsMetropolis,
+    Metropolis,
+    Slice,
+    CompoundStep,
+    NormalProposal,
+    MultivariateNormalProposal,
+    HamiltonianMC,
+    EllipticalSlice,
+    SMC,
+    DEMetropolis,
+)
 from pymc3.theanof import floatX
 from pymc3.distributions import (
-    Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal)
+    Binomial,
+    Normal,
+    Bernoulli,
+    Categorical,
+    Beta,
+    HalfNormal,
+)
 
 from numpy.testing import assert_array_almost_equal
 import numpy as np
@@ -25,136 +46,626 @@
 
 class TestStepMethods(object):  # yield test doesn't work subclassing object
     master_samples = {
-        Slice: np.array([ 0.10233528,  0.40458486,  0.17329217,  0.46281232,  0.22556278,
-        1.52632836, -0.27823807,  0.02539625,  1.02711735,  0.03686346,
-       -0.62841281, -0.27125083,  0.31989505,  0.84031155, -0.18949138,
-        1.60550262,  1.01375291, -0.29742941,  0.35312738,  0.43363622,
-        1.18898078,  0.80063888,  0.38445644,  0.90184395,  1.69150017,
-        2.05452171, -0.13334755,  1.61265408,  1.36579345,  1.3216292 ,
-       -0.59487037, -0.34648927,  1.05107285,  0.42870305,  0.61552257,
-        0.55239884,  0.13929271,  0.26213809, -0.2316028 ,  0.19711046,
-        1.42832629,  1.93641434, -0.81142379, -0.31059485, -0.3189694 ,
-        1.43542534,  0.40311093,  1.63103768,  0.24034874,  0.33924866,
-        0.94951616,  0.71700185,  0.79273056, -0.44569146,  1.91974783,
-        0.84673795,  1.12411833, -0.83123811, -0.54310095, -0.00721347,
-        0.9925055 ,  1.04015058, -0.34958074, -0.14926302, -0.47990225,
-       -0.75629446, -0.95942067,  1.68179204,  1.20598073,  1.39675733,
-        1.22755935,  0.06728757,  1.05184231,  1.01126791, -0.67327093,
-        0.21429651,  1.33730461, -1.56174184, -0.64348764,  0.98050636,
-        0.25923049,  0.58622631,  0.46589069,  1.44367347, -0.43141573,
-        1.08293374, -0.5563204 ,  1.46287904,  1.26019815,  0.52972104,
-        1.08792687,  1.10064358,  1.84881549,  0.91179647,  0.69316592,
-       -0.47657064,  2.22747063,  0.83388935,  0.84680716, -0.10556406]),
-        HamiltonianMC: np.array([ 0.43733634,  0.43733634,  0.15955614, -0.44355329,  0.21465731,
-        0.30148244,  0.45527282,  0.45527282,  0.41753005, -0.03480236,
-        1.16599611,  0.565306  ,  0.565306  ,  0.0077143 , -0.18291321,
-       -0.14577946, -0.00703353, -0.00703353,  0.14345194, -0.12345058,
-        0.76875516,  0.76875516,  0.84289506,  0.24596225,  0.95287087,
-        1.3799335 ,  1.1493899 ,  1.1493899 ,  2.0255982 , -0.77850273,
-        0.11604115,  0.11604115,  0.39296557,  0.34826491,  0.5951183 ,
-        0.63097341,  0.57938784,  0.57938784,  0.76570029,  0.63516046,
-        0.23667784,  2.0151377 ,  1.92064966,  1.09125654, -0.43716787,
-        0.61939595,  0.30566853,  0.30566853,  0.3690641 ,  0.3690641 ,
-        0.3690641 ,  1.26497542,  0.90890334,  0.01482818,  0.01482818,
-       -0.15542473,  0.26475651,  0.32687263,  1.21902207,  0.6708017 ,
-       -0.18867695, -0.18867695, -0.07141329, -0.04631175, -0.16855462,
-       -0.16855462,  1.05455573,  0.47371825,  0.47371825,  0.86307077,
-        0.86307077,  0.51484125,  1.0022533 ,  1.0022533 ,  1.02370316,
-        0.71331829,  0.71331829,  0.71331829,  0.40758664,  0.81307434,
-       -0.46269741, -0.60284666,  0.06710527,  0.06710527, -0.35055053,
-        0.36727629,  0.36727629,  0.69350367,  0.11268647,  0.37681301,
-        1.10168386,  0.49559472,  0.49559472,  0.06193658, -0.07947103,
-        0.01969434,  1.28470893, -0.13536813, -0.13536813,  0.6575966 ]),
-        Metropolis: np.array([ 1.62434536,  1.01258895,  0.4844172 ,  0.4844172 ,  0.4844172 ,
-        0.4844172 ,  0.4844172 ,  0.4844172 ,  0.4844172 ,  0.4844172 ,
-        0.31198899,  0.31198899,  0.31198899,  0.31198899,  1.21284494,
-        0.52911708,  0.261229  ,  0.79158447,  0.10441177, -0.74079387,
-       -0.74079387, -0.50637818, -0.50637818, -0.50637818, -0.45557042,
-       -0.45557042, -0.33541147,  0.28179164,  0.58196196,  0.22971211,
-        0.02081788,  0.60744107,  0.8930284 ,  0.8930284 ,  1.40595822,
-        1.10786538,  1.10786538,  1.10786538,  1.10786538, -0.28863095,
-       -0.12859388,  0.74757504,  0.74757504,  0.74757504,  0.97766977,
-        0.97766977,  0.75534163,  0.55458356,  0.75288328,  0.87189193,
-        0.9937132 ,  0.9937132 ,  0.61842825,  0.61842825,  0.27457457,
-        0.31817143,  0.31817143,  0.31817143, -0.77674042, -0.60735798,
-        0.13319847, -0.82050213, -0.82050213, -0.50534274, -0.15479676,
-       -0.15479676, -0.19349227, -0.19349227, -0.21810923, -0.21810923,
-       -0.21810923,  1.0180548 , -0.18121323,  0.68213209,  0.68213209,
-        1.23266958,  1.23266958,  0.60913885,  1.41099989,  1.45756718,
-        1.45756718,  1.45756718,  1.45756718,  1.59526839,  1.82776295,
-        1.82776295,  1.82776295,  1.82776295,  2.2691274 ,  2.16897216,
-        2.18638157,  1.06436284,  0.54726838,  0.54726838,  1.04247971,
-        0.86777655,  0.86777655,  0.86777655,  0.86777655,  0.61914177]),
-        NUTS: np.array([ 0.550575  ,  0.550575  ,  0.80046332,  0.91590059,  1.34621916,
-        1.34621916, -0.63917773, -0.65770809, -0.65770809, -0.64512868,
-       -1.05448153, -0.5225666 ,  0.14335153, -0.0034499 , -0.0034499 ,
-        0.05309212, -0.53186371,  0.29325825,  0.43210854,  0.56284837,
-        0.56284837,  0.38041767,  0.47322034,  0.49937368,  0.49937368,
-        0.44424258,  0.44424258, -0.02790848, -0.40470145, -0.35725567,
-       -0.43744228,  0.41955432,  0.31099421,  0.31099421,  0.65811717,
-        0.66649398,  0.38493786,  0.54114658,  0.54114658,  0.68222408,
-        0.66404942,  1.44143108,  1.15638799, -0.06775775, -0.06775775,
-        0.30418561,  0.23543403,  0.57934404, -0.5435111 , -0.47938915,
-       -0.23816662,  0.36793792,  0.36793792,  0.64980016,  0.52150456,
-        0.64643321,  0.26130179,  1.10569077,  1.10569077,  1.23662797,
-       -0.36928735, -0.14303069,  0.85298904,  0.85298904,  0.31422085,
-        0.32113762,  0.32113762,  1.0692238 ,  1.0692238 ,  1.60127576,
-        1.49249738,  1.09065107,  0.84264371,  0.84264371, -0.08832343,
-        0.04868027, -0.02679449, -0.02679449,  0.91989101,  0.65754478,
-       -0.39220625,  0.08379492,  1.03055634,  1.03055634,  1.71071332,
-        1.58740483,  1.67905741,  0.77744868,  0.15050587,  0.15050587,
-        0.73979127,  0.15445515,  0.13134717,  0.85068974,  0.85068974,
-        0.6974799 ,  0.16170472,  0.86405959,  0.86405959, -0.22032854]),
-       SMC: np.array([ 5.10950205e-02,  1.09811720e+00,  1.78330202e-01,  6.85938766e-01,
-        1.42354476e-01, -1.59630758e+00,  1.57176810e+00, -4.01398917e-01,
-        1.14567871e+00,  1.14954938e+00,  4.94399840e-01,  1.16253017e+00,
-        1.17432244e+00,  7.79195162e-01,  1.29017945e+00,  2.53722905e-01,
-        5.38589898e-01,  3.52121216e-01,  1.35795966e+00,  1.02086933e-01,
-        1.58845251e+00,  6.76852927e-01, -1.04716592e-02, -1.01613324e-01,
-        1.37680965e+00,  7.40036542e-01,  2.89069320e-01,  1.48153741e+00,
-        9.58156958e-01,  5.73623782e-02,  7.68850721e-01,  3.68643390e-01,
-        1.47645964e+00,  2.32596780e-01, -1.85008158e-01,  3.71335958e-01,
-        2.68600102e+00, -4.89504443e-01,  6.54265561e-02,  3.80455349e-01,
-        1.17875338e+00,  2.30233324e-01,  6.90960231e-01,  8.81668685e-01,
-       -2.19754340e-01,  1.27686862e-01,  3.28444250e-01,  1.34820635e-01,
-        5.29725257e-01,  1.43783915e+00, -1.64754264e-01,  7.41446719e-01,
-       -1.17733186e+00,  6.01215658e-02,  1.82638158e-01, -2.23232214e-02,
-       -1.79877583e-02,  8.37949150e-01,  4.41964955e-01, -8.66524743e-01,
-        4.90738093e-01,  2.42056488e-01,  4.67699626e-01,  2.91075351e-01,
-        1.49541153e+00,  8.30730845e-01,  1.03956404e+00, -5.16162910e-01,
-        2.84338859e-01,  1.72305888e+00,  9.52445566e-01,  1.48831718e+00,
-        8.03455325e-01,  1.48840970e+00,  6.98122664e-01,  3.30187139e-01,
-        7.88029712e-01,  9.31510828e-01,  1.01326878e+00,  2.26637755e-01,
-        1.70703646e-01, -8.54429841e-01,  2.97254590e-01, -2.77843274e-01,
-       -2.25544207e-01,  1.98862826e-02,  5.05953885e-01,  4.98203941e-01,
-        1.20897382e+00, -6.32958669e-05, -7.22425896e-01,  1.60930869e+00,
-       -5.02773645e-01,  2.46405678e+00,  9.16039706e-01,  1.14146060e+00,
-       -1.95781984e-01, -2.44653942e-01,  2.67851290e-01,  2.37462012e-01,
-        6.71471950e-01,  1.18319765e+00,  1.29146530e+00, -3.14177753e-01,
-       -1.31041215e-02,  1.05029405e+00,  1.31202399e+00,  7.40532839e-02,
-        9.15510041e-01,  7.71054604e-01,  9.83483263e-01,  9.03032142e-01,
-        9.14191160e-01,  9.32285366e-01,  1.13937607e+00, -4.29155928e-01,
-        3.44609229e-02, -5.46423555e-02,  1.34625982e+00, -1.28287047e-01,
-       -1.55214879e-02,  3.25294234e-01,  1.06120585e+00, -5.09891282e-01,
-        1.25789335e+00,  1.01808348e+00, -9.92590713e-01,  1.72832932e+00,
-        1.12232980e+00,  8.54801892e-01,  1.41534752e+00,  3.50798405e-01,
-        3.69381623e-01,  1.48608411e+00, -1.15506310e-02,  1.57066360e+00,
-        2.00747378e-01,  4.47219763e-01,  5.57720524e-01, -7.74295353e-02,
-        1.79192501e+00,  7.66510475e-01,  1.38852488e+00, -4.06055122e-01,
-        2.73203156e-01,  3.61014687e-01,  1.23574043e+00,  1.64565746e-01,
-       -9.89896480e-02,  9.26130265e-02,  1.06440134e+00, -1.55890408e-01,
-        4.47131846e-01, -7.59186008e-01, -1.50881256e+00, -2.13928005e-01,
-       -4.19160151e-01,  1.75815544e+00,  7.45423008e-01,  6.94781506e-01,
-        1.58596346e+00,  1.75508724e+00,  4.56070434e-01,  2.94128709e-02,
-        1.17703970e+00, -9.90230827e-02,  8.42796845e-01,  1.79154944e+00,
-        5.92779197e-01,  2.73562285e-01,  1.61597907e+00,  1.23514403e+00,
-        4.86261080e-01, -3.10434934e-01,  5.57873722e-01,  6.50365217e-01,
-       -3.41009850e-01,  9.26851109e-01,  8.28936486e-01,  9.16180689e-02,
-        1.30226405e+00,  3.73945789e-01,  6.04560122e-02,  6.00698708e-01,
-        9.68764731e-02,  1.41904148e+00,  6.94182961e-03,  3.17504138e-01,
-        5.90956041e-01, -5.78113887e-01,  5.26615565e-01, -4.19715252e-01,
-        8.92891364e-01,  1.30207363e-01,  4.19899637e-01,  7.10275704e-01,
-        9.27418179e-02,  1.85758044e+00,  4.76988907e-01, -1.36341398e-01]),
+        Slice: np.array(
+            [
+                0.10233528,
+                0.40458486,
+                0.17329217,
+                0.46281232,
+                0.22556278,
+                1.52632836,
+                -0.27823807,
+                0.02539625,
+                1.02711735,
+                0.03686346,
+                -0.62841281,
+                -0.27125083,
+                0.31989505,
+                0.84031155,
+                -0.18949138,
+                1.60550262,
+                1.01375291,
+                -0.29742941,
+                0.35312738,
+                0.43363622,
+                1.18898078,
+                0.80063888,
+                0.38445644,
+                0.90184395,
+                1.69150017,
+                2.05452171,
+                -0.13334755,
+                1.61265408,
+                1.36579345,
+                1.3216292,
+                -0.59487037,
+                -0.34648927,
+                1.05107285,
+                0.42870305,
+                0.61552257,
+                0.55239884,
+                0.13929271,
+                0.26213809,
+                -0.2316028,
+                0.19711046,
+                1.42832629,
+                1.93641434,
+                -0.81142379,
+                -0.31059485,
+                -0.3189694,
+                1.43542534,
+                0.40311093,
+                1.63103768,
+                0.24034874,
+                0.33924866,
+                0.94951616,
+                0.71700185,
+                0.79273056,
+                -0.44569146,
+                1.91974783,
+                0.84673795,
+                1.12411833,
+                -0.83123811,
+                -0.54310095,
+                -0.00721347,
+                0.9925055,
+                1.04015058,
+                -0.34958074,
+                -0.14926302,
+                -0.47990225,
+                -0.75629446,
+                -0.95942067,
+                1.68179204,
+                1.20598073,
+                1.39675733,
+                1.22755935,
+                0.06728757,
+                1.05184231,
+                1.01126791,
+                -0.67327093,
+                0.21429651,
+                1.33730461,
+                -1.56174184,
+                -0.64348764,
+                0.98050636,
+                0.25923049,
+                0.58622631,
+                0.46589069,
+                1.44367347,
+                -0.43141573,
+                1.08293374,
+                -0.5563204,
+                1.46287904,
+                1.26019815,
+                0.52972104,
+                1.08792687,
+                1.10064358,
+                1.84881549,
+                0.91179647,
+                0.69316592,
+                -0.47657064,
+                2.22747063,
+                0.83388935,
+                0.84680716,
+                -0.10556406,
+            ]
+        ),
+        HamiltonianMC: np.array(
+            [
+                0.43733634,
+                0.43733634,
+                0.15955614,
+                -0.44355329,
+                0.21465731,
+                0.30148244,
+                0.45527282,
+                0.45527282,
+                0.41753005,
+                -0.03480236,
+                1.16599611,
+                0.565306,
+                0.565306,
+                0.0077143,
+                -0.18291321,
+                -0.14577946,
+                -0.00703353,
+                -0.00703353,
+                0.14345194,
+                -0.12345058,
+                0.76875516,
+                0.76875516,
+                0.84289506,
+                0.24596225,
+                0.95287087,
+                1.3799335,
+                1.1493899,
+                1.1493899,
+                2.0255982,
+                -0.77850273,
+                0.11604115,
+                0.11604115,
+                0.39296557,
+                0.34826491,
+                0.5951183,
+                0.63097341,
+                0.57938784,
+                0.57938784,
+                0.76570029,
+                0.63516046,
+                0.23667784,
+                2.0151377,
+                1.92064966,
+                1.09125654,
+                -0.43716787,
+                0.61939595,
+                0.30566853,
+                0.30566853,
+                0.3690641,
+                0.3690641,
+                0.3690641,
+                1.26497542,
+                0.90890334,
+                0.01482818,
+                0.01482818,
+                -0.15542473,
+                0.26475651,
+                0.32687263,
+                1.21902207,
+                0.6708017,
+                -0.18867695,
+                -0.18867695,
+                -0.07141329,
+                -0.04631175,
+                -0.16855462,
+                -0.16855462,
+                1.05455573,
+                0.47371825,
+                0.47371825,
+                0.86307077,
+                0.86307077,
+                0.51484125,
+                1.0022533,
+                1.0022533,
+                1.02370316,
+                0.71331829,
+                0.71331829,
+                0.71331829,
+                0.40758664,
+                0.81307434,
+                -0.46269741,
+                -0.60284666,
+                0.06710527,
+                0.06710527,
+                -0.35055053,
+                0.36727629,
+                0.36727629,
+                0.69350367,
+                0.11268647,
+                0.37681301,
+                1.10168386,
+                0.49559472,
+                0.49559472,
+                0.06193658,
+                -0.07947103,
+                0.01969434,
+                1.28470893,
+                -0.13536813,
+                -0.13536813,
+                0.6575966,
+            ]
+        ),
+        Metropolis: np.array(
+            [
+                1.62434536,
+                1.01258895,
+                0.4844172,
+                0.4844172,
+                0.4844172,
+                0.4844172,
+                0.4844172,
+                0.4844172,
+                0.4844172,
+                0.4844172,
+                0.31198899,
+                0.31198899,
+                0.31198899,
+                0.31198899,
+                1.21284494,
+                0.52911708,
+                0.261229,
+                0.79158447,
+                0.10441177,
+                -0.74079387,
+                -0.74079387,
+                -0.50637818,
+                -0.50637818,
+                -0.50637818,
+                -0.45557042,
+                -0.45557042,
+                -0.33541147,
+                0.28179164,
+                0.58196196,
+                0.22971211,
+                0.02081788,
+                0.60744107,
+                0.8930284,
+                0.8930284,
+                1.40595822,
+                1.10786538,
+                1.10786538,
+                1.10786538,
+                1.10786538,
+                -0.28863095,
+                -0.12859388,
+                0.74757504,
+                0.74757504,
+                0.74757504,
+                0.97766977,
+                0.97766977,
+                0.75534163,
+                0.55458356,
+                0.75288328,
+                0.87189193,
+                0.9937132,
+                0.9937132,
+                0.61842825,
+                0.61842825,
+                0.27457457,
+                0.31817143,
+                0.31817143,
+                0.31817143,
+                -0.77674042,
+                -0.60735798,
+                0.13319847,
+                -0.82050213,
+                -0.82050213,
+                -0.50534274,
+                -0.15479676,
+                -0.15479676,
+                -0.19349227,
+                -0.19349227,
+                -0.21810923,
+                -0.21810923,
+                -0.21810923,
+                1.0180548,
+                -0.18121323,
+                0.68213209,
+                0.68213209,
+                1.23266958,
+                1.23266958,
+                0.60913885,
+                1.41099989,
+                1.45756718,
+                1.45756718,
+                1.45756718,
+                1.45756718,
+                1.59526839,
+                1.82776295,
+                1.82776295,
+                1.82776295,
+                1.82776295,
+                2.2691274,
+                2.16897216,
+                2.18638157,
+                1.06436284,
+                0.54726838,
+                0.54726838,
+                1.04247971,
+                0.86777655,
+                0.86777655,
+                0.86777655,
+                0.86777655,
+                0.61914177,
+            ]
+        ),
+        NUTS: np.array(
+            [
+                0.550575,
+                0.550575,
+                0.80046332,
+                0.91590059,
+                1.34621916,
+                1.34621916,
+                -0.63917773,
+                -0.65770809,
+                -0.65770809,
+                -0.64512868,
+                -1.05448153,
+                -0.5225666,
+                0.14335153,
+                -0.0034499,
+                -0.0034499,
+                0.05309212,
+                -0.53186371,
+                0.29325825,
+                0.43210854,
+                0.56284837,
+                0.56284837,
+                0.38041767,
+                0.47322034,
+                0.49937368,
+                0.49937368,
+                0.44424258,
+                0.44424258,
+                -0.02790848,
+                -0.40470145,
+                -0.35725567,
+                -0.43744228,
+                0.41955432,
+                0.31099421,
+                0.31099421,
+                0.65811717,
+                0.66649398,
+                0.38493786,
+                0.54114658,
+                0.54114658,
+                0.68222408,
+                0.66404942,
+                1.44143108,
+                1.15638799,
+                -0.06775775,
+                -0.06775775,
+                0.30418561,
+                0.23543403,
+                0.57934404,
+                -0.5435111,
+                -0.47938915,
+                -0.23816662,
+                0.36793792,
+                0.36793792,
+                0.64980016,
+                0.52150456,
+                0.64643321,
+                0.26130179,
+                1.10569077,
+                1.10569077,
+                1.23662797,
+                -0.36928735,
+                -0.14303069,
+                0.85298904,
+                0.85298904,
+                0.31422085,
+                0.32113762,
+                0.32113762,
+                1.0692238,
+                1.0692238,
+                1.60127576,
+                1.49249738,
+                1.09065107,
+                0.84264371,
+                0.84264371,
+                -0.08832343,
+                0.04868027,
+                -0.02679449,
+                -0.02679449,
+                0.91989101,
+                0.65754478,
+                -0.39220625,
+                0.08379492,
+                1.03055634,
+                1.03055634,
+                1.71071332,
+                1.58740483,
+                1.67905741,
+                0.77744868,
+                0.15050587,
+                0.15050587,
+                0.73979127,
+                0.15445515,
+                0.13134717,
+                0.85068974,
+                0.85068974,
+                0.6974799,
+                0.16170472,
+                0.86405959,
+                0.86405959,
+                -0.22032854,
+            ]
+        ),
+        SMC: np.array(
+            [
+                5.10950205e-02,
+                1.09811720e00,
+                1.78330202e-01,
+                6.85938766e-01,
+                1.42354476e-01,
+                -1.59630758e00,
+                1.57176810e00,
+                -4.01398917e-01,
+                1.14567871e00,
+                1.14954938e00,
+                4.94399840e-01,
+                1.16253017e00,
+                1.17432244e00,
+                7.79195162e-01,
+                1.29017945e00,
+                2.53722905e-01,
+                5.38589898e-01,
+                3.52121216e-01,
+                1.35795966e00,
+                1.02086933e-01,
+                1.58845251e00,
+                6.76852927e-01,
+                -1.04716592e-02,
+                -1.01613324e-01,
+                1.37680965e00,
+                7.40036542e-01,
+                2.89069320e-01,
+                1.48153741e00,
+                9.58156958e-01,
+                5.73623782e-02,
+                7.68850721e-01,
+                3.68643390e-01,
+                1.47645964e00,
+                2.32596780e-01,
+                -1.85008158e-01,
+                3.71335958e-01,
+                2.68600102e00,
+                -4.89504443e-01,
+                6.54265561e-02,
+                3.80455349e-01,
+                1.17875338e00,
+                2.30233324e-01,
+                6.90960231e-01,
+                8.81668685e-01,
+                -2.19754340e-01,
+                1.27686862e-01,
+                3.28444250e-01,
+                1.34820635e-01,
+                5.29725257e-01,
+                1.43783915e00,
+                -1.64754264e-01,
+                7.41446719e-01,
+                -1.17733186e00,
+                6.01215658e-02,
+                1.82638158e-01,
+                -2.23232214e-02,
+                -1.79877583e-02,
+                8.37949150e-01,
+                4.41964955e-01,
+                -8.66524743e-01,
+                4.90738093e-01,
+                2.42056488e-01,
+                4.67699626e-01,
+                2.91075351e-01,
+                1.49541153e00,
+                8.30730845e-01,
+                1.03956404e00,
+                -5.16162910e-01,
+                2.84338859e-01,
+                1.72305888e00,
+                9.52445566e-01,
+                1.48831718e00,
+                8.03455325e-01,
+                1.48840970e00,
+                6.98122664e-01,
+                3.30187139e-01,
+                7.88029712e-01,
+                9.31510828e-01,
+                1.01326878e00,
+                2.26637755e-01,
+                1.70703646e-01,
+                -8.54429841e-01,
+                2.97254590e-01,
+                -2.77843274e-01,
+                -2.25544207e-01,
+                1.98862826e-02,
+                5.05953885e-01,
+                4.98203941e-01,
+                1.20897382e00,
+                -6.32958669e-05,
+                -7.22425896e-01,
+                1.60930869e00,
+                -5.02773645e-01,
+                2.46405678e00,
+                9.16039706e-01,
+                1.14146060e00,
+                -1.95781984e-01,
+                -2.44653942e-01,
+                2.67851290e-01,
+                2.37462012e-01,
+                6.71471950e-01,
+                1.18319765e00,
+                1.29146530e00,
+                -3.14177753e-01,
+                -1.31041215e-02,
+                1.05029405e00,
+                1.31202399e00,
+                7.40532839e-02,
+                9.15510041e-01,
+                7.71054604e-01,
+                9.83483263e-01,
+                9.03032142e-01,
+                9.14191160e-01,
+                9.32285366e-01,
+                1.13937607e00,
+                -4.29155928e-01,
+                3.44609229e-02,
+                -5.46423555e-02,
+                1.34625982e00,
+                -1.28287047e-01,
+                -1.55214879e-02,
+                3.25294234e-01,
+                1.06120585e00,
+                -5.09891282e-01,
+                1.25789335e00,
+                1.01808348e00,
+                -9.92590713e-01,
+                1.72832932e00,
+                1.12232980e00,
+                8.54801892e-01,
+                1.41534752e00,
+                3.50798405e-01,
+                3.69381623e-01,
+                1.48608411e00,
+                -1.15506310e-02,
+                1.57066360e00,
+                2.00747378e-01,
+                4.47219763e-01,
+                5.57720524e-01,
+                -7.74295353e-02,
+                1.79192501e00,
+                7.66510475e-01,
+                1.38852488e00,
+                -4.06055122e-01,
+                2.73203156e-01,
+                3.61014687e-01,
+                1.23574043e00,
+                1.64565746e-01,
+                -9.89896480e-02,
+                9.26130265e-02,
+                1.06440134e00,
+                -1.55890408e-01,
+                4.47131846e-01,
+                -7.59186008e-01,
+                -1.50881256e00,
+                -2.13928005e-01,
+                -4.19160151e-01,
+                1.75815544e00,
+                7.45423008e-01,
+                6.94781506e-01,
+                1.58596346e00,
+                1.75508724e00,
+                4.56070434e-01,
+                2.94128709e-02,
+                1.17703970e00,
+                -9.90230827e-02,
+                8.42796845e-01,
+                1.79154944e00,
+                5.92779197e-01,
+                2.73562285e-01,
+                1.61597907e00,
+                1.23514403e00,
+                4.86261080e-01,
+                -3.10434934e-01,
+                5.57873722e-01,
+                6.50365217e-01,
+                -3.41009850e-01,
+                9.26851109e-01,
+                8.28936486e-01,
+                9.16180689e-02,
+                1.30226405e00,
+                3.73945789e-01,
+                6.04560122e-02,
+                6.00698708e-01,
+                9.68764731e-02,
+                1.41904148e00,
+                6.94182961e-03,
+                3.17504138e-01,
+                5.90956041e-01,
+                -5.78113887e-01,
+                5.26615565e-01,
+                -4.19715252e-01,
+                8.92891364e-01,
+                1.30207363e-01,
+                4.19899637e-01,
+                7.10275704e-01,
+                9.27418179e-02,
+                1.85758044e00,
+                4.76988907e-01,
+                -1.36341398e-01,
+            ]
+        ),
     }
 
     def setup_class(self):
@@ -163,7 +674,9 @@ def setup_class(self):
     def teardown_class(self):
         shutil.rmtree(self.temp_dir)
 
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_sample_exact(self):
         for step_method in self.master_samples:
             self.check_trace(step_method)
@@ -186,27 +699,37 @@ def check_trace(self, step_method):
         """
         n_steps = 100
         with Model() as model:
-            x = Normal('x', mu=0, sd=1)
-            y = Normal('y', mu=x, sd=1, observed=1)
-            if step_method.__name__ == 'SMC':
-                trace = sample(draws=200,
-                               random_seed=1,
-                               progressbar=False,
-                               step=step_method())
-            elif step_method.__name__ == 'NUTS':
+            x = Normal("x", mu=0, sd=1)
+            y = Normal("y", mu=x, sd=1, observed=1)
+            if step_method.__name__ == "SMC":
+                trace = sample(
+                    draws=200, random_seed=1, progressbar=False, step=step_method()
+                )
+            elif step_method.__name__ == "NUTS":
                 step = step_method(scaling=model.test_point)
-                trace = sample(0, tune=n_steps,
-                               discard_tuned_samples=False,
-                               step=step, random_seed=1, chains=1)
+                trace = sample(
+                    0,
+                    tune=n_steps,
+                    discard_tuned_samples=False,
+                    step=step,
+                    random_seed=1,
+                    chains=1,
+                )
             else:
-                trace = sample(0, tune=n_steps,
-                               discard_tuned_samples=False,
-                               step=step_method(), random_seed=1, chains=1)
+                trace = sample(
+                    0,
+                    tune=n_steps,
+                    discard_tuned_samples=False,
+                    step=step_method(),
+                    random_seed=1,
+                    chains=1,
+                )
 
         assert_array_almost_equal(
-            trace['x'],
+            trace["x"],
             self.master_samples[step_method],
-            decimal=select_by_precision(float64=6, float32=4))
+            decimal=select_by_precision(float64=6, float32=4),
+        )
 
     def check_stat(self, check, trace, name):
         for (var, stat, value, bound) in check:
@@ -215,9 +738,8 @@ def check_stat(self, check, trace, name):
 
     def test_step_continuous(self):
         start, model, (mu, C) = mv_simple()
-        unc = np.diag(C) ** .5
-        check = (('x', np.mean, mu, unc / 10.),
-                 ('x', np.std, unc, unc / 10.))
+        unc = np.diag(C) ** 0.5
+        check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, unc, unc / 10.0))
         with model:
             steps = (
                 Slice(),
@@ -227,59 +749,77 @@ def test_step_continuous(self):
                 Slice(blocked=True),
                 HamiltonianMC(scaling=C, is_cov=True),
                 NUTS(scaling=C, is_cov=True),
-                CompoundStep([
-                    HamiltonianMC(scaling=C, is_cov=True),
-                    HamiltonianMC(scaling=C, is_cov=True, blocked=False)]),
+                CompoundStep(
+                    [
+                        HamiltonianMC(scaling=C, is_cov=True),
+                        HamiltonianMC(scaling=C, is_cov=True, blocked=False),
+                    ]
+                ),
             )
         for step in steps:
-            trace = sample(0, tune=8000, chains=1,
-                           discard_tuned_samples=False, step=step,
-                           start=start, model=model, random_seed=1)
+            trace = sample(
+                0,
+                tune=8000,
+                chains=1,
+                discard_tuned_samples=False,
+                step=step,
+                start=start,
+                model=model,
+                random_seed=1,
+            )
             self.check_stat(check, trace, step.__class__.__name__)
 
     def test_step_discrete(self):
         if theano.config.floatX == "float32":
             return  # Cannot use @skip because it only skips one iteration of the yield
         start, model, (mu, C) = mv_simple_discrete()
-        unc = np.diag(C) ** .5
-        check = (('x', np.mean, mu, unc / 10.),
-                 ('x', np.std, unc, unc / 10.))
+        unc = np.diag(C) ** 0.5
+        check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, unc, unc / 10.0))
         with model:
-            steps = (
-                Metropolis(S=C, proposal_dist=MultivariateNormalProposal),
-            )
+            steps = (Metropolis(S=C, proposal_dist=MultivariateNormalProposal),)
         for step in steps:
-            trace = sample(20000, tune=0, step=step, start=start, model=model,
-                           random_seed=1, chains=1)
+            trace = sample(
+                20000,
+                tune=0,
+                step=step,
+                start=start,
+                model=model,
+                random_seed=1,
+                chains=1,
+            )
             self.check_stat(check, trace, step.__class__.__name__)
 
     def test_step_categorical(self):
         start, model, (mu, C) = simple_categorical()
-        unc = C ** .5
-        check = (('x', np.mean, mu, unc / 10.),
-                 ('x', np.std, unc, unc / 10.))
+        unc = C ** 0.5
+        check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, unc, unc / 10.0))
         with model:
             steps = (
-                CategoricalGibbsMetropolis(model.x, proposal='uniform'),
-                CategoricalGibbsMetropolis(model.x, proposal='proportional'),
+                CategoricalGibbsMetropolis(model.x, proposal="uniform"),
+                CategoricalGibbsMetropolis(model.x, proposal="proportional"),
             )
         for step in steps:
-            trace = sample(8000, tune=0, step=step, start=start, model=model, random_seed=1)
+            trace = sample(
+                8000, tune=0, step=step, start=start, model=model, random_seed=1
+            )
             self.check_stat(check, trace, step.__class__.__name__)
 
     def test_step_elliptical_slice(self):
         start, model, (K, L, mu, std, noise) = mv_prior_simple()
         unc = noise ** 0.5
-        check = (('x', np.mean, mu, unc / 10.),
-                 ('x', np.std, std, unc / 10.))
+        check = (("x", np.mean, mu, unc / 10.0), ("x", np.std, std, unc / 10.0))
         with model:
-            steps = (
-                EllipticalSlice(prior_cov=K),
-                EllipticalSlice(prior_chol=L),
-            )
+            steps = (EllipticalSlice(prior_cov=K), EllipticalSlice(prior_chol=L))
         for step in steps:
-            trace = sample(5000, tune=0, step=step, start=start, model=model,
-                           random_seed=1, chains=1)
+            trace = sample(
+                5000,
+                tune=0,
+                step=step,
+                start=start,
+                model=model,
+                random_seed=1,
+                chains=1,
+            )
             self.check_stat(check, trace, step.__class__.__name__)
 
 
@@ -309,8 +849,10 @@ def test_mv_proposal(self):
 class TestCompoundStep(object):
     samplers = (Metropolis, Slice, HamiltonianMC, NUTS, DEMetropolis)
 
-    @pytest.mark.skipif(theano.config.floatX == "float32",
-                        reason="Test fails on 32 bit due to linalg issues")
+    @pytest.mark.skipif(
+        theano.config.floatX == "float32",
+        reason="Test fails on 32 bit due to linalg issues",
+    )
     def test_non_blocked(self):
         """Test that samplers correctly create non-blocked compound steps."""
         _, model = simple_2model_continuous()
@@ -318,8 +860,10 @@ def test_non_blocked(self):
             for sampler in self.samplers:
                 assert isinstance(sampler(blocked=False), CompoundStep)
 
-    @pytest.mark.skipif(theano.config.floatX == "float32",
-                        reason="Test fails on 32 bit due to linalg issues")
+    @pytest.mark.skipif(
+        theano.config.floatX == "float32",
+        reason="Test fails on 32 bit due to linalg issues",
+    )
     def test_blocked(self):
         _, model = simple_2model_continuous()
         with model:
@@ -333,50 +877,53 @@ class TestAssignStepMethods(object):
     def test_bernoulli(self):
         """Test bernoulli distribution is assigned binary gibbs metropolis method"""
         with Model() as model:
-            Bernoulli('x', 0.5)
+            Bernoulli("x", 0.5)
             steps = assign_step_methods(model, [])
         assert isinstance(steps, BinaryGibbsMetropolis)
 
     def test_normal(self):
         """Test normal distribution is assigned NUTS method"""
         with Model() as model:
-            Normal('x', 0, 1)
+            Normal("x", 0, 1)
             steps = assign_step_methods(model, [])
         assert isinstance(steps, NUTS)
 
     def test_categorical(self):
         """Test categorical distribution is assigned categorical gibbs metropolis method"""
         with Model() as model:
-            Categorical('x', np.array([0.25, 0.75]))
+            Categorical("x", np.array([0.25, 0.75]))
             steps = assign_step_methods(model, [])
         assert isinstance(steps, BinaryGibbsMetropolis)
         with Model() as model:
-            Categorical('y', np.array([0.25, 0.70, 0.05]))
+            Categorical("y", np.array([0.25, 0.70, 0.05]))
             steps = assign_step_methods(model, [])
         assert isinstance(steps, CategoricalGibbsMetropolis)
 
     def test_binomial(self):
         """Test binomial distribution is assigned metropolis method."""
         with Model() as model:
-            Binomial('x', 10, 0.5)
+            Binomial("x", 10, 0.5)
             steps = assign_step_methods(model, [])
         assert isinstance(steps, Metropolis)
 
     def test_normal_nograd_op(self):
         """Test normal distribution without an implemented gradient is assigned slice method"""
         with Model() as model:
-            x = Normal('x', 0, 1)
+            x = Normal("x", 0, 1)
 
             # a custom Theano Op that does not have a grad:
             is_64 = theano.config.floatX == "float64"
             itypes = [tt.dscalar] if is_64 else [tt.fscalar]
             otypes = [tt.dscalar] if is_64 else [tt.fscalar]
+
             @theano.as_op(itypes, otypes)
             def kill_grad(x):
                 return x
 
             data = np.random.normal(size=(100,))
-            Normal("y", mu=kill_grad(x), sd=1, observed=data.astype(theano.config.floatX))
+            Normal(
+                "y", mu=kill_grad(x), sd=1, observed=data.astype(theano.config.floatX)
+            )
 
             steps = assign_step_methods(model, [])
         assert isinstance(steps, Slice)
@@ -389,7 +936,7 @@ class TestPopulationSamplers(object):
     def test_checks_population_size(self):
         """Test that population samplers check the population size."""
         with Model() as model:
-            n = Normal('n', mu=0, sd=1)
+            n = Normal("n", mu=0, sd=1)
             for stepper in TestPopulationSamplers.steppers:
                 step = stepper()
                 with pytest.raises(ValueError):
@@ -399,83 +946,93 @@ def test_checks_population_size(self):
 
     def test_parallelized_chains_are_random(self):
         with Model() as model:
-            x = Normal('x', 0, 1)
+            x = Normal("x", 0, 1)
             for stepper in TestPopulationSamplers.steppers:
                 step = stepper()
 
-                trace = sample(chains=4, draws=20, tune=0, step=DEMetropolis(),
-                               parallelize=True)
-                samples = np.array(trace.get_values('x', combine=False))[:,5]
+                trace = sample(
+                    chains=4, draws=20, tune=0, step=DEMetropolis(), parallelize=True
+                )
+                samples = np.array(trace.get_values("x", combine=False))[:, 5]
 
-                assert len(set(samples)) == 4, 'Parallelized {} ' \
-                    'chains are identical.'.format(stepper)
+                assert (
+                    len(set(samples)) == 4
+                ), "Parallelized {} " "chains are identical.".format(stepper)
         pass
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestNutsCheckTrace(object):
     def test_multiple_samplers(self, caplog):
         with Model():
-            prob = Beta('prob', alpha=5., beta=3.)
-            Binomial('outcome', n=1, p=prob)
+            prob = Beta("prob", alpha=5.0, beta=3.0)
+            Binomial("outcome", n=1, p=prob)
             caplog.clear()
-            sample(3, tune=2, discard_tuned_samples=False,
-                   n_init=None, chains=1)
+            sample(3, tune=2, discard_tuned_samples=False, n_init=None, chains=1)
             messages = [msg.msg for msg in caplog.records]
-            assert all('boolean index did not' not in msg for msg in messages)
+            assert all("boolean index did not" not in msg for msg in messages)
 
     def test_bad_init(self):
         with Model():
-            HalfNormal('a', sd=1, testval=-1, transform=None)
+            HalfNormal("a", sd=1, testval=-1, transform=None)
             with pytest.raises(ValueError) as error:
                 sample(init=None)
-            error.match('Bad initial')
+            error.match("Bad initial")
 
     def test_linalg(self, caplog):
         with Model():
-            a = Normal('a', shape=2)
+            a = Normal("a", shape=2)
             a = tt.switch(a > 0, np.inf, a)
             b = tt.slinalg.solve(floatX(np.eye(2)), a)
-            Normal('c', mu=b, shape=2)
+            Normal("c", mu=b, shape=2)
             caplog.clear()
             trace = sample(20, init=None, tune=5, chains=2)
             warns = [msg.msg for msg in caplog.records]
-            assert np.any(trace['diverging'])
+            assert np.any(trace["diverging"])
             assert (
-                any('divergence after tuning' in warn
-                    for warn in warns)
-                or
-                any('divergences after tuning' in warn
-                    for warn in warns)
-                or
-                any('only diverging samples' in warn
-                    for warn in warns))
+                any("divergence after tuning" in warn for warn in warns)
+                or any("divergences after tuning" in warn for warn in warns)
+                or any("only diverging samples" in warn for warn in warns)
+            )
 
             with pytest.raises(ValueError) as error:
                 trace.report.raise_ok()
-            error.match('issues during sampling')
+            error.match("issues during sampling")
 
             assert not trace.report.ok
 
     def test_sampler_stats(self):
         with Model() as model:
-            x = Normal('x', mu=0, sd=1)
+            x = Normal("x", mu=0, sd=1)
             trace = sample(draws=10, tune=1, chains=1)
 
         # Assert stats exist and have the correct shape.
         expected_stat_names = {
-            'depth', 'diverging', 'energy', 'energy_error', 'model_logp',
-            'max_energy_error', 'mean_tree_accept', 'step_size',
-            'step_size_bar', 'tree_size', 'tune'
+            "depth",
+            "diverging",
+            "energy",
+            "energy_error",
+            "model_logp",
+            "max_energy_error",
+            "mean_tree_accept",
+            "step_size",
+            "step_size_bar",
+            "tree_size",
+            "tune",
         }
-        assert(trace.stat_names == expected_stat_names)
+        assert trace.stat_names == expected_stat_names
         for varname in trace.stat_names:
-            assert(trace.get_sampler_stats(varname).shape == (10,))
+            assert trace.get_sampler_stats(varname).shape == (10,)
 
         # Assert model logp is computed correctly: computing post-sampling
         # and tracking while sampling should give same results.
-        model_logp_ = np.array([
-            model.logp(trace.point(i, chain=c))
-            for c in trace.chains for i in range(len(trace))
-        ])
-        assert((trace.model_logp == model_logp_).all())
+        model_logp_ = np.array(
+            [
+                model.logp(trace.point(i, chain=c))
+                for c in trace.chains
+                for i in range(len(trace))
+            ]
+        )
+        assert (trace.model_logp == model_logp_).all()
diff --git a/pymc3/tests/test_text_backend.py b/pymc3/tests/test_text_backend.py
index f524d16605..d2dc867a8e 100644
--- a/pymc3/tests/test_text_backend.py
+++ b/pymc3/tests/test_text_backend.py
@@ -6,7 +6,7 @@
 
 
 class TestTextSampling(object):
-    name = 'text-db'
+    name = "text-db"
 
     def test_supports_sampler_stats(self):
         with pm.Model():
@@ -20,53 +20,57 @@ def teardown_method(self):
 
 class TestText0dSampling(bf.SamplingTestCase):
     backend = text.Text
-    name = 'text-db'
+    name = "text-db"
     shape = ()
 
 
 class TestText1dSampling(bf.SamplingTestCase):
     backend = text.Text
-    name = 'text-db'
+    name = "text-db"
     shape = 2
 
 
 class TestText2dSampling(bf.SamplingTestCase):
     backend = text.Text
-    name = 'text-db'
+    name = "text-db"
     shape = (2, 3)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestText0dSelection(bf.SelectionTestCase):
     backend = text.Text
-    name = 'text-db'
+    name = "text-db"
     shape = ()
 
 
 class TestText1dSelection(bf.SelectionTestCase):
     backend = text.Text
-    name = 'text-db'
+    name = "text-db"
     shape = 2
 
 
 class TestText2dSelection(bf.SelectionTestCase):
     backend = text.Text
-    name = 'text-db'
+    name = "text-db"
     shape = (2, 3)
 
 
 class TestTextDumpLoad(bf.DumpLoadTestCase):
     backend = text.Text
     load_func = staticmethod(text.load)
-    name = 'text-db'
+    name = "text-db"
     shape = (2, 3)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 class TestTextDumpFunction(bf.BackendEqualityTestCase):
     backend0 = backend1 = ndarray.NDArray
     name0 = None
-    name1 = 'text-db'
+    name1 = "text-db"
     shape = (2, 3)
 
     @classmethod
@@ -81,5 +85,5 @@ class TestNDArrayTextEquality(bf.BackendEqualityTestCase):
     backend0 = ndarray.NDArray
     name0 = None
     backend1 = text.Text
-    name1 = 'text-db'
+    name1 = "text-db"
     shape = (2, 3)
diff --git a/pymc3/tests/test_theanof.py b/pymc3/tests/test_theanof.py
index 4a681a2a00..62f4593534 100644
--- a/pymc3/tests/test_theanof.py
+++ b/pymc3/tests/test_theanof.py
@@ -9,20 +9,21 @@
 class TestSetTheanoConfig(object):
     def test_invalid_key(self):
         with pytest.raises(ValueError) as e:
-            set_theano_conf({'bad_key': True})
-        e.match('Unknown')
+            set_theano_conf({"bad_key": True})
+        e.match("Unknown")
 
     def test_restore_when_bad_key(self):
-        with theano.configparser.change_flags(compute_test_value='off'):
+        with theano.configparser.change_flags(compute_test_value="off"):
             with pytest.raises(ValueError):
                 conf = collections.OrderedDict(
-                    [('compute_test_value', 'raise'), ('bad_key', True)])
+                    [("compute_test_value", "raise"), ("bad_key", True)]
+                )
                 set_theano_conf(conf)
-            assert theano.config.compute_test_value == 'off'
+            assert theano.config.compute_test_value == "off"
 
     def test_restore(self):
-        with theano.configparser.change_flags(compute_test_value='off'):
-            conf = set_theano_conf({'compute_test_value': 'raise'})
-            assert conf == {'compute_test_value': 'off'}
+        with theano.configparser.change_flags(compute_test_value="off"):
+            conf = set_theano_conf({"compute_test_value": "raise"})
+            assert conf == {"compute_test_value": "off"}
             conf = set_theano_conf(conf)
-            assert conf == {'compute_test_value': 'raise'}
+            assert conf == {"compute_test_value": "raise"}
diff --git a/pymc3/tests/test_tracetab.py b/pymc3/tests/test_tracetab.py
index 2d1380719d..872289d5f0 100644
--- a/pymc3/tests/test_tracetab.py
+++ b/pymc3/tests/test_tracetab.py
@@ -7,7 +7,7 @@
 
 class TestTraceToDf(bf.ModelBackendSampledTestCase):
     backend = ndarray.NDArray
-    name = 'text-db'
+    name = "text-db"
     shape = (2, 3)
 
     def test_trace_to_dataframe(self):
@@ -22,9 +22,9 @@ def test_trace_to_dataframe(self):
             # `shape`.
             if vararr.shape[1:] != self.shape:
                 continue
-            npt.assert_equal(vararr[:, 0, 0], df[varname + '__0_0'].values)
-            npt.assert_equal(vararr[:, 1, 0], df[varname + '__1_0'].values)
-            npt.assert_equal(vararr[:, 1, 2], df[varname + '__1_2'].values)
+            npt.assert_equal(vararr[:, 0, 0], df[varname + "__0_0"].values)
+            npt.assert_equal(vararr[:, 1, 0], df[varname + "__1_0"].values)
+            npt.assert_equal(vararr[:, 1, 2], df[varname + "__1_2"].values)
             checked = True
         assert checked
 
@@ -40,38 +40,37 @@ def test_trace_to_dataframe_chain_arg(self):
             # `shape`.
             if vararr.shape[1:] != self.shape:
                 continue
-            npt.assert_equal(vararr[:, 0, 0], df[varname + '__0_0'].values)
-            npt.assert_equal(vararr[:, 1, 0], df[varname + '__1_0'].values)
-            npt.assert_equal(vararr[:, 1, 2], df[varname + '__1_2'].values)
+            npt.assert_equal(vararr[:, 0, 0], df[varname + "__0_0"].values)
+            npt.assert_equal(vararr[:, 1, 0], df[varname + "__1_0"].values)
+            npt.assert_equal(vararr[:, 1, 2], df[varname + "__1_2"].values)
             checked = True
         assert checked
 
 
 def test_create_flat_names_0d():
     shape = ()
-    result = ttab.create_flat_names('x', shape)
-    expected = ['x']
+    result = ttab.create_flat_names("x", shape)
+    expected = ["x"]
     assert result == expected
     assert ttab._create_shape(result) == shape
 
 
 def test_create_flat_names_1d():
-    shape = 2,
-    result = ttab.create_flat_names('x', shape)
-    expected = ['x__0', 'x__1']
+    shape = (2,)
+    result = ttab.create_flat_names("x", shape)
+    expected = ["x__0", "x__1"]
     assert result == expected
     assert ttab._create_shape(result) == shape
 
 
 def test_create_flat_names_2d():
     shape = 2, 3
-    result = ttab.create_flat_names('x', shape)
-    expected = ['x__0_0', 'x__0_1', 'x__0_2',
-                'x__1_0', 'x__1_1', 'x__1_2']
+    result = ttab.create_flat_names("x", shape)
+    expected = ["x__0_0", "x__0_1", "x__0_2", "x__1_0", "x__1_1", "x__1_2"]
     assert result == expected
     assert ttab._create_shape(result) == shape
 
 
 def test_create_flat_names_3d():
     shape = 2, 3, 4
-    assert ttab._create_shape(ttab.create_flat_names('x', shape)) == shape
+    assert ttab._create_shape(ttab.create_flat_names("x", shape)) == shape
diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py
index 5b7dc58345..e525b3726d 100644
--- a/pymc3/tests/test_transforms.py
+++ b/pymc3/tests/test_transforms.py
@@ -6,20 +6,29 @@
 import theano
 import theano.tensor as tt
 from .helpers import SeededTest
-from .test_distributions import (Simplex, Rplusbig, Rminusbig,
-                                 Unit, R, Vector, MultiSimplex,
-                                 Circ, SortedVector, UnitSortedVector)
+from .test_distributions import (
+    Simplex,
+    Rplusbig,
+    Rminusbig,
+    Unit,
+    R,
+    Vector,
+    MultiSimplex,
+    Circ,
+    SortedVector,
+    UnitSortedVector,
+)
 from .checks import close_to, close_to_logical
 from ..theanof import jacobian
 
 
 # some transforms (stick breaking) require additon of small slack in order to be numerically
 # stable. The minimal addable slack for float32 is higher thus we need to be less strict
-tol = 1e-7 if theano.config.floatX == 'float64' else 1e-6
+tol = 1e-7 if theano.config.floatX == "float64" else 1e-6
 
 
 def check_transform(transform, domain, constructor=tt.dscalar, test=0):
-    x = constructor('x')
+    x = constructor("x")
     x.tag.test_value = test
     # test forward and forward_val
     forward_f = theano.function([x], transform.forward(x))
@@ -35,18 +44,21 @@ def check_vector_transform(transform, domain):
 
 
 def get_values(transform, domain=R, constructor=tt.dscalar, test=0):
-    x = constructor('x')
+    x = constructor("x")
     x.tag.test_value = test
     f = theano.function([x], transform.backward(x))
     return np.array([f(val) for val in domain.vals])
 
 
-def check_jacobian_det(transform, domain,
-                       constructor=tt.dscalar,
-                       test=0,
-                       make_comparable=None,
-                       elemwise=False):
-    y = constructor('y')
+def check_jacobian_det(
+    transform,
+    domain,
+    constructor=tt.dscalar,
+    test=0,
+    make_comparable=None,
+    elemwise=False,
+):
+    y = constructor("y")
     y.tag.test_value = test
 
     x = transform.backward(y)
@@ -61,49 +73,52 @@ def check_jacobian_det(transform, domain,
     # ljd = log jacobian det
     actual_ljd = theano.function([y], jac)
 
-    computed_ljd = theano.function([y], tt.as_tensor_variable(
-        transform.jacobian_det(y)), on_unused_input='ignore')
+    computed_ljd = theano.function(
+        [y], tt.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore"
+    )
 
     for yval in domain.vals:
-        close_to(
-            actual_ljd(yval),
-            computed_ljd(yval), tol)
+        close_to(actual_ljd(yval), computed_ljd(yval), tol)
 
 
 def test_simplex():
     check_vector_transform(tr.stick_breaking, Simplex(2))
     check_vector_transform(tr.stick_breaking, Simplex(4))
 
-    check_transform(tr.stick_breaking, MultiSimplex(
-        3, 2), constructor=tt.dmatrix, test=np.zeros((2, 2)))
+    check_transform(
+        tr.stick_breaking,
+        MultiSimplex(3, 2),
+        constructor=tt.dmatrix,
+        test=np.zeros((2, 2)),
+    )
 
 
 def test_simplex_bounds():
-    vals = get_values(tr.stick_breaking, Vector(R, 2),
-                      tt.dvector, np.array([0, 0]))
+    vals = get_values(tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0]))
 
     close_to(vals.sum(axis=1), 1, tol)
     close_to_logical(vals > 0, True, tol)
     close_to_logical(vals < 1, True, tol)
 
-    check_jacobian_det(tr.stick_breaking, Vector(
-        R, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1])
+    check_jacobian_det(
+        tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1]
+    )
 
 
 def test_sum_to_1():
     check_vector_transform(tr.sum_to_1, Simplex(2))
     check_vector_transform(tr.sum_to_1, Simplex(4))
 
-    check_jacobian_det(tr.sum_to_1, Vector(Unit, 2),
-                       tt.dvector, np.array([0, 0]), lambda x: x[:-1])
+    check_jacobian_det(
+        tr.sum_to_1, Vector(Unit, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1]
+    )
 
 
 def test_log():
     check_transform(tr.log, Rplusbig)
 
     check_jacobian_det(tr.log, Rplusbig, elemwise=True)
-    check_jacobian_det(tr.log, Vector(Rplusbig, 2),
-                       tt.dvector, [0, 0], elemwise=True)
+    check_jacobian_det(tr.log, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True)
 
     vals = get_values(tr.log)
     close_to_logical(vals > 0, True, tol)
@@ -113,8 +128,9 @@ def test_log_exp_m1():
     check_transform(tr.log_exp_m1, Rplusbig)
 
     check_jacobian_det(tr.log_exp_m1, Rplusbig, elemwise=True)
-    check_jacobian_det(tr.log_exp_m1, Vector(Rplusbig, 2),
-                       tt.dvector, [0, 0], elemwise=True)
+    check_jacobian_det(
+        tr.log_exp_m1, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True
+    )
 
     vals = get_values(tr.log_exp_m1)
     close_to_logical(vals > 0, True, tol)
@@ -124,8 +140,9 @@ def test_logodds():
     check_transform(tr.logodds, Unit)
 
     check_jacobian_det(tr.logodds, Unit, elemwise=True)
-    check_jacobian_det(tr.logodds, Vector(Unit, 2),
-                       tt.dvector, [.5, .5], elemwise=True)
+    check_jacobian_det(
+        tr.logodds, Vector(Unit, 2), tt.dvector, [0.5, 0.5], elemwise=True
+    )
 
     vals = get_values(tr.logodds)
     close_to_logical(vals > 0, True, tol)
@@ -137,8 +154,7 @@ def test_lowerbound():
     check_transform(trans, Rplusbig)
 
     check_jacobian_det(trans, Rplusbig, elemwise=True)
-    check_jacobian_det(trans, Vector(Rplusbig, 2),
-                       tt.dvector, [0, 0], elemwise=True)
+    check_jacobian_det(trans, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True)
 
     vals = get_values(trans)
     close_to_logical(vals > 0, True, tol)
@@ -149,15 +165,14 @@ def test_upperbound():
     check_transform(trans, Rminusbig)
 
     check_jacobian_det(trans, Rminusbig, elemwise=True)
-    check_jacobian_det(trans, Vector(Rminusbig, 2),
-                       tt.dvector, [-1, -1], elemwise=True)
+    check_jacobian_det(trans, Vector(Rminusbig, 2), tt.dvector, [-1, -1], elemwise=True)
 
     vals = get_values(trans)
     close_to_logical(vals < 0, True, tol)
 
 
 def test_interval():
-    for a, b in [(-4, 5.5), (.1, .7), (-10, 4.3)]:
+    for a, b in [(-4, 5.5), (0.1, 0.7), (-10, 4.3)]:
         domain = Unit * np.float64(b - a) + np.float64(a)
         trans = tr.interval(a, b)
         check_transform(trans, domain)
@@ -185,24 +200,26 @@ def test_circular():
 def test_ordered():
     check_vector_transform(tr.ordered, SortedVector(6))
 
-    check_jacobian_det(tr.ordered, Vector(R, 2),
-                       tt.dvector, np.array([0, 0]), elemwise=False)
+    check_jacobian_det(
+        tr.ordered, Vector(R, 2), tt.dvector, np.array([0, 0]), elemwise=False
+    )
 
-    vals = get_values(tr.ordered, Vector(R, 3),
-                      tt.dvector, np.zeros(3))
+    vals = get_values(tr.ordered, Vector(R, 3), tt.dvector, np.zeros(3))
     close_to_logical(np.diff(vals) >= 0, True, tol)
 
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(
+    condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+)
 def test_chain():
     chain_tranf = tr.Chain([tr.logodds, tr.ordered])
     check_vector_transform(chain_tranf, UnitSortedVector(3))
 
-    check_jacobian_det(chain_tranf, Vector(R, 4),
-                       tt.dvector, np.zeros(4), elemwise=False)
+    check_jacobian_det(
+        chain_tranf, Vector(R, 4), tt.dvector, np.zeros(4), elemwise=False
+    )
 
-    vals = get_values(chain_tranf, Vector(R, 5),
-                      tt.dvector, np.zeros(5))
+    vals = get_values(chain_tranf, Vector(R, 5), tt.dvector, np.zeros(5))
     close_to_logical(np.diff(vals) >= 0, True, tol)
 
 
@@ -211,7 +228,7 @@ def build_model(self, distfam, params, shape, transform, testval=None):
         if testval is not None:
             testval = pm.floatX(testval)
         with pm.Model() as m:
-            distfam('x', shape=shape, transform=transform, testval=testval, **params)
+            distfam("x", shape=shape, transform=transform, testval=testval, **params)
         return m
 
     def check_transform_elementwise_logp(self, model):
@@ -234,7 +251,7 @@ def check_transform_elementwise_logp(self, model):
     def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
         x0 = model.deterministics[0]
         x = model.free_RVs[0]
-        assert (x.ndim-1) == x.logp_elemwiset.ndim
+        assert (x.ndim - 1) == x.logp_elemwiset.ndim
 
         pt = model.test_point
         array = np.random.randn(*pt[x.name].shape)
@@ -252,145 +269,186 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
 
         close_to(x.logp_elemwise(pt), elementwiselogp.eval(), tol)
 
-    @pytest.mark.parametrize('sd,shape', [
-        (2.5, 2),
-        (5., (2, 3)),
-        (np.ones(3)*10., (4, 3)),
-    ])
+    @pytest.mark.parametrize(
+        "sd,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3) * 10.0, (4, 3))]
+    )
     def test_half_normal(self, sd, shape):
-        model = self.build_model(pm.HalfNormal, {'sd': sd}, shape=shape, transform=tr.log)
+        model = self.build_model(
+            pm.HalfNormal, {"sd": sd}, shape=shape, transform=tr.log
+        )
         self.check_transform_elementwise_logp(model)
 
-    @pytest.mark.parametrize('lam,shape', [
-        (2.5, 2),
-        (5., (2, 3)),
-        (np.ones(3), (4, 3))
-    ])
+    @pytest.mark.parametrize(
+        "lam,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]
+    )
     def test_exponential(self, lam, shape):
-        model = self.build_model(pm.Exponential, {'lam': lam}, shape=shape, transform=tr.log)
+        model = self.build_model(
+            pm.Exponential, {"lam": lam}, shape=shape, transform=tr.log
+        )
         self.check_transform_elementwise_logp(model)
 
-    @pytest.mark.parametrize('a,b,shape', [
-        (1., 1., 2),
-        (.5, .5, (2, 3)),
-        (np.ones(3), np.ones(3), (4, 3)),
-    ])
+    @pytest.mark.parametrize(
+        "a,b,shape",
+        [(1.0, 1.0, 2), (0.5, 0.5, (2, 3)), (np.ones(3), np.ones(3), (4, 3))],
+    )
     def test_beta(self, a, b, shape):
-        model = self.build_model(pm.Beta, {'alpha': a, 'beta': b}, shape=shape, transform=tr.logodds)
+        model = self.build_model(
+            pm.Beta, {"alpha": a, "beta": b}, shape=shape, transform=tr.logodds
+        )
         self.check_transform_elementwise_logp(model)
 
-    @pytest.mark.parametrize('lower,upper,shape', [
-        (0., 1., 2),
-        (.5, 5.5, (2, 3)),
-        (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))
-    ])
+    @pytest.mark.parametrize(
+        "lower,upper,shape",
+        [
+            (0.0, 1.0, 2),
+            (0.5, 5.5, (2, 3)),
+            (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)),
+        ],
+    )
     def test_uniform(self, lower, upper, shape):
         interval = tr.Interval(lower, upper)
-        model = self.build_model(pm.Uniform, {'lower': lower, 'upper': upper},
-                                 shape=shape, transform=interval)
+        model = self.build_model(
+            pm.Uniform,
+            {"lower": lower, "upper": upper},
+            shape=shape,
+            transform=interval,
+        )
         self.check_transform_elementwise_logp(model)
 
-    @pytest.mark.parametrize('mu,kappa,shape', [
-        (0., 1., 2),
-        (-.5, 5.5, (2, 3)),
-        (np.zeros(3), np.ones(3), (4, 3))
-    ])
+    @pytest.mark.parametrize(
+        "mu,kappa,shape",
+        [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))],
+    )
     def test_vonmises(self, mu, kappa, shape):
-        model = self.build_model(pm.VonMises, {'mu': mu, 'kappa': kappa}, shape=shape, transform=tr.circular)
+        model = self.build_model(
+            pm.VonMises, {"mu": mu, "kappa": kappa}, shape=shape, transform=tr.circular
+        )
         self.check_transform_elementwise_logp(model)
 
-    @pytest.mark.parametrize('a,shape', [
-        (np.ones(2), 2),
-        (np.ones((2, 3))*.5, (2, 3)),
-        (np.ones(3), (4, 3))
-    ])
+    @pytest.mark.parametrize(
+        "a,shape",
+        [(np.ones(2), 2), (np.ones((2, 3)) * 0.5, (2, 3)), (np.ones(3), (4, 3))],
+    )
     def test_dirichlet(self, a, shape):
-        model = self.build_model(pm.Dirichlet, {'a': a}, shape=shape, transform=tr.stick_breaking)
+        model = self.build_model(
+            pm.Dirichlet, {"a": a}, shape=shape, transform=tr.stick_breaking
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=1)
 
     def test_normal_ordered(self):
-        model = self.build_model(pm.Normal, {'mu': 0., 'sd': 1.}, shape=3,
-                                 testval=np.asarray([-1., 1., 4.]),
-                                 transform=tr.ordered)
+        model = self.build_model(
+            pm.Normal,
+            {"mu": 0.0, "sd": 1.0},
+            shape=3,
+            testval=np.asarray([-1.0, 1.0, 4.0]),
+            transform=tr.ordered,
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('sd,shape', [
-        (2.5, (2,)),
-        (np.ones(3), (4, 3)),
-    ])
-    @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.parametrize("sd,shape", [(2.5, (2,)), (np.ones(3), (4, 3))])
+    @pytest.mark.xfail(
+        condition=(theano.config.floatX == "float32"), reason="Fails on float32"
+    )
     def test_half_normal_ordered(self, sd, shape):
         testval = np.sort(np.abs(np.random.randn(*shape)))
-        model = self.build_model(pm.HalfNormal, {'sd': sd}, shape=shape,
-                                 testval=testval,
-                                 transform=tr.Chain([tr.log, tr.ordered]))
+        model = self.build_model(
+            pm.HalfNormal,
+            {"sd": sd},
+            shape=shape,
+            testval=testval,
+            transform=tr.Chain([tr.log, tr.ordered]),
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('lam,shape', [
-        (2.5, (2,)),
-        (np.ones(3), (4, 3))
-    ])
+    @pytest.mark.parametrize("lam,shape", [(2.5, (2,)), (np.ones(3), (4, 3))])
     def test_exponential_ordered(self, lam, shape):
         testval = np.sort(np.abs(np.random.randn(*shape)))
-        model = self.build_model(pm.Exponential, {'lam': lam}, shape=shape,
-                                 testval=testval,
-                                 transform=tr.Chain([tr.log, tr.ordered]))
+        model = self.build_model(
+            pm.Exponential,
+            {"lam": lam},
+            shape=shape,
+            testval=testval,
+            transform=tr.Chain([tr.log, tr.ordered]),
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('a,b,shape', [
-        (1., 1., (2,)),
-        (np.ones(3), np.ones(3), (4, 3)),
-    ])
+    @pytest.mark.parametrize(
+        "a,b,shape", [(1.0, 1.0, (2,)), (np.ones(3), np.ones(3), (4, 3))]
+    )
     def test_beta_ordered(self, a, b, shape):
         testval = np.sort(np.abs(np.random.rand(*shape)))
-        model = self.build_model(pm.Beta, {'alpha': a, 'beta': b}, shape=shape,
-                                 testval=testval,
-                                 transform=tr.Chain([tr.logodds, tr.ordered]))
+        model = self.build_model(
+            pm.Beta,
+            {"alpha": a, "beta": b},
+            shape=shape,
+            testval=testval,
+            transform=tr.Chain([tr.logodds, tr.ordered]),
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('lower,upper,shape', [
-        (0., 1., (2,)),
-        (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))
-    ])
+    @pytest.mark.parametrize(
+        "lower,upper,shape",
+        [(0.0, 1.0, (2,)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))],
+    )
     def test_uniform_ordered(self, lower, upper, shape):
         interval = tr.Interval(lower, upper)
         testval = np.sort(np.abs(np.random.rand(*shape)))
-        model = self.build_model(pm.Uniform, {'lower': lower, 'upper': upper}, shape=shape,
-                                 testval=testval,
-                                 transform=tr.Chain([interval, tr.ordered]))
+        model = self.build_model(
+            pm.Uniform,
+            {"lower": lower, "upper": upper},
+            shape=shape,
+            testval=testval,
+            transform=tr.Chain([interval, tr.ordered]),
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('mu,kappa,shape', [
-        (0., 1., (2,)),
-        (np.zeros(3), np.ones(3), (4, 3))
-    ])
+    @pytest.mark.parametrize(
+        "mu,kappa,shape", [(0.0, 1.0, (2,)), (np.zeros(3), np.ones(3), (4, 3))]
+    )
     def test_vonmises_ordered(self, mu, kappa, shape):
         testval = np.sort(np.abs(np.random.rand(*shape)))
-        model = self.build_model(pm.VonMises, {'mu': mu, 'kappa': kappa}, shape=shape,
-                                 testval=testval,
-                                 transform=tr.Chain([tr.circular, tr.ordered]))
+        model = self.build_model(
+            pm.VonMises,
+            {"mu": mu, "kappa": kappa},
+            shape=shape,
+            testval=testval,
+            transform=tr.Chain([tr.circular, tr.ordered]),
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('lower,upper,shape,transform', [
-        (0., 1., (2,), tr.stick_breaking),
-        (.5, 5.5, (2, 3), tr.stick_breaking),
-        (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds]))
-    ])
+    @pytest.mark.parametrize(
+        "lower,upper,shape,transform",
+        [
+            (0.0, 1.0, (2,), tr.stick_breaking),
+            (0.5, 5.5, (2, 3), tr.stick_breaking),
+            (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])),
+        ],
+    )
     def test_uniform_other(self, lower, upper, shape, transform):
-        testval = np.ones(shape)/shape[-1]
-        model = self.build_model(pm.Uniform, {'lower': lower, 'upper': upper},
-                                 shape=shape,
-                                 testval=testval,
-                                 transform=transform)
+        testval = np.ones(shape) / shape[-1]
+        model = self.build_model(
+            pm.Uniform,
+            {"lower": lower, "upper": upper},
+            shape=shape,
+            testval=testval,
+            transform=transform,
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=0)
 
-    @pytest.mark.parametrize('mu,cov,shape', [
-        (np.zeros(2), np.diag(np.ones(2)), (2,)),
-        (np.zeros(3), np.diag(np.ones(3)), (4, 3)),
-    ])
+    @pytest.mark.parametrize(
+        "mu,cov,shape",
+        [
+            (np.zeros(2), np.diag(np.ones(2)), (2,)),
+            (np.zeros(3), np.diag(np.ones(3)), (4, 3)),
+        ],
+    )
     def test_mvnormal_ordered(self, mu, cov, shape):
         testval = np.sort(np.random.randn(*shape))
-        model = self.build_model(pm.MvNormal, {'mu': mu, 'cov': cov}, shape=shape,
-                                 testval=testval,
-                                 transform=tr.ordered)
+        model = self.build_model(
+            pm.MvNormal,
+            {"mu": mu, "cov": cov},
+            shape=shape,
+            testval=testval,
+            transform=tr.ordered,
+        )
         self.check_vectortransform_elementwise_logp(model, vect_opt=1)
diff --git a/pymc3/tests/test_tuning.py b/pymc3/tests/test_tuning.py
index 9da803d693..9f6769fc25 100644
--- a/pymc3/tests/test_tuning.py
+++ b/pymc3/tests/test_tuning.py
@@ -5,7 +5,7 @@
 
 
 def test_adjust_precision():
-    a = np.array([-10, -.01, 0, 10, 1e300, -inf, inf])
+    a = np.array([-10, -0.01, 0, 10, 1e300, -inf, inf])
     a1 = scaling.adjust_precision(a)
     assert all((a1 > 0) & (a1 < 1e200))
 
@@ -24,7 +24,7 @@ def test_mle_jacobian():
     with model:
         map_estimate = find_MAP(method="BFGS", model=model)
 
-    rtol = 1E-5  # this rtol should work on both floatX precisions
+    rtol = 1e-5  # this rtol should work on both floatX precisions
     np.testing.assert_allclose(map_estimate["mu_i"], truth, rtol=rtol)
 
     start, model, _ = models.simple_normal(bounded_prior=True)
diff --git a/pymc3/tests/test_types.py b/pymc3/tests/test_types.py
index e59fce37c8..15ae8452b7 100644
--- a/pymc3/tests/test_types.py
+++ b/pymc3/tests/test_types.py
@@ -22,27 +22,29 @@ def teardown_method(self):
         # restore theano config
         theano.config = self.theano_config
 
-    @change_flags({'floatX': 'float64', 'warn_float64': 'ignore'})
+    @change_flags({"floatX": "float64", "warn_float64": "ignore"})
     def test_float64(self):
         with Model() as model:
-            x = Normal('x', testval=np.array(1., dtype='float64'))
-            obs = Normal('obs', mu=x, sd=1., observed=np.random.randn(5))
+            x = Normal("x", testval=np.array(1.0, dtype="float64"))
+            obs = Normal("obs", mu=x, sd=1.0, observed=np.random.randn(5))
 
-        assert x.dtype == 'float64'
-        assert obs.dtype == 'float64'
+        assert x.dtype == "float64"
+        assert obs.dtype == "float64"
 
         for sampler in self.samplers:
             with model:
                 sample(10, sampler())
 
-    @change_flags({'floatX': 'float32', 'warn_float64': 'warn'})
+    @change_flags({"floatX": "float32", "warn_float64": "warn"})
     def test_float32(self):
         with Model() as model:
-            x = Normal('x', testval=np.array(1., dtype='float32'))
-            obs = Normal('obs', mu=x, sd=1., observed=np.random.randn(5).astype('float32'))
+            x = Normal("x", testval=np.array(1.0, dtype="float32"))
+            obs = Normal(
+                "obs", mu=x, sd=1.0, observed=np.random.randn(5).astype("float32")
+            )
 
-        assert x.dtype == 'float32'
-        assert obs.dtype == 'float32'
+        assert x.dtype == "float32"
+        assert obs.dtype == "float32"
 
         for sampler in self.samplers:
             with model:
diff --git a/pymc3/tests/test_updates.py b/pymc3/tests/test_updates.py
index 6b0e641240..78df9270c0 100644
--- a/pymc3/tests/test_updates.py
+++ b/pymc3/tests/test_updates.py
@@ -11,50 +11,63 @@
     adadelta,
     adam,
     adamax,
-    adagrad_window
+    adagrad_window,
 )
 
-_a = theano.shared(1.)
-_b = _a*2
+_a = theano.shared(1.0)
+_b = _a * 2
 
-_m = theano.shared(np.empty((10, ), theano.config.floatX))
+_m = theano.shared(np.empty((10,), theano.config.floatX))
 _n = _m.sum()
 _m2 = theano.shared(np.empty((10, 10, 10), theano.config.floatX))
 _n2 = _b + _n + _m2.sum()
 
 
 @pytest.mark.parametrize(
-    'opt',
-    [sgd, momentum, nesterov_momentum,
-     adagrad, rmsprop, adadelta, adam,
-     adamax, adagrad_window],
-    ids=['sgd', 'momentum', 'nesterov_momentum',
-         'adagrad', 'rmsprop', 'adadelta', 'adam',
-         'adamax', 'adagrad_window']
+    "opt",
+    [
+        sgd,
+        momentum,
+        nesterov_momentum,
+        adagrad,
+        rmsprop,
+        adadelta,
+        adam,
+        adamax,
+        adagrad_window,
+    ],
+    ids=[
+        "sgd",
+        "momentum",
+        "nesterov_momentum",
+        "adagrad",
+        "rmsprop",
+        "adadelta",
+        "adam",
+        "adamax",
+        "adagrad_window",
+    ],
 )
 @pytest.mark.parametrize(
-    'getter',
-    [lambda t: t,              # all params -> ok
-     lambda t: (None, t[1]),   # missing loss -> fail
-     lambda t: (t[0], None),   # missing params -> fail
-     lambda t: (None, None)],  # all missing -> partial
-    ids=['all_params',
-         'missing_loss',
-         'missing_params',
-         'all_missing']
+    "getter",
+    [
+        lambda t: t,  # all params -> ok
+        lambda t: (None, t[1]),  # missing loss -> fail
+        lambda t: (t[0], None),  # missing params -> fail
+        lambda t: (None, None),
+    ],  # all missing -> partial
+    ids=["all_params", "missing_loss", "missing_params", "all_missing"],
 )
 @pytest.mark.parametrize(
-    'kwargs',
-    [dict(), dict(learning_rate=1e-2)],
-    ids=['without_args', 'with_args']
+    "kwargs", [dict(), dict(learning_rate=1e-2)], ids=["without_args", "with_args"]
 )
 @pytest.mark.parametrize(
-    'loss_and_params',
+    "loss_and_params",
     [(_b, [_a]), (_n, [_m]), (_n2, [_a, _m, _m2])],
-    ids=['scalar', 'matrix', 'mixed']
+    ids=["scalar", "matrix", "mixed"],
 )
 def test_updates_fast(opt, loss_and_params, kwargs, getter):
-    with change_flags(compute_test_value='ignore'):
+    with change_flags(compute_test_value="ignore"):
         loss, param = getter(loss_and_params)
         args = dict()
         args.update(**kwargs)
diff --git a/pymc3/tests/test_util.py b/pymc3/tests/test_util.py
index 52e567cc75..15fc66ad31 100644
--- a/pymc3/tests/test_util.py
+++ b/pymc3/tests/test_util.py
@@ -8,11 +8,8 @@
 
 
 class TestTransformName(object):
-    cases = [
-        ('var', 'var_test__'),
-        ('var_test_', 'var_test__test__')
-    ]
-    transform_name = 'test'
+    cases = [("var", "var_test__"), ("var_test_", "var_test__test__")]
+    transform_name = "test"
 
     def test_get_transformed_name(self):
         test_transform = Transform()
@@ -32,55 +29,51 @@ def test_get_untransformed_name(self):
                 pm.util.get_untransformed_name(name)
 
 
-
 class TestUpdateStartVals(SeededTest):
     def setup_method(self):
         super(TestUpdateStartVals, self).setup_method()
-    
+
     def test_soft_update_all_present(self):
-        start = {'a': 1, 'b': 2}
-        test_point = {'a': 3, 'b': 4}
+        start = {"a": 1, "b": 2}
+        test_point = {"a": 3, "b": 4}
         pm.util.update_start_vals(start, test_point, model=None)
-        assert start == {'a': 1, 'b': 2}
-    
+        assert start == {"a": 1, "b": 2}
+
     def test_soft_update_one_missing(self):
-        start = {'a': 1, }
-        test_point = {'a': 3, 'b': 4}
+        start = {"a": 1}
+        test_point = {"a": 3, "b": 4}
         pm.util.update_start_vals(start, test_point, model=None)
-        assert start == {'a': 1, 'b': 4}
-    
+        assert start == {"a": 1, "b": 4}
+
     def test_soft_update_empty(self):
         start = {}
-        test_point = {'a': 3, 'b': 4}
+        test_point = {"a": 3, "b": 4}
         pm.util.update_start_vals(start, test_point, model=None)
         assert start == test_point
-    
+
     def test_soft_update_transformed(self):
         with pm.Model() as model:
-            pm.Exponential('a', 1)
-        start = {'a': 2.}
-        test_point = {'a_log__': 0}
+            pm.Exponential("a", 1)
+        start = {"a": 2.0}
+        test_point = {"a_log__": 0}
         pm.util.update_start_vals(start, test_point, model)
-        assert_almost_equal(np.exp(start['a_log__']), start['a'])
-    
+        assert_almost_equal(np.exp(start["a_log__"]), start["a"])
+
     def test_soft_update_parent(self):
         with pm.Model() as model:
-            a = pm.Uniform('a', lower=0., upper=1.)
-            b = pm.Uniform('b', lower=2., upper=3.)
-            pm.Uniform('lower', lower=a, upper=3.)
-            pm.Uniform('upper', lower=0., upper=b)
-            pm.Uniform('interv', lower=a, upper=b)
-        
-        start = {'a': .3, 'b': 2.1, 'lower': 1.4, 'upper': 1.4, 'interv':1.4}
-        test_point = {'lower_interval__': -0.3746934494414109,
-            'upper_interval__': 0.693147180559945,
-                'interv_interval__': 0.4519851237430569}
-        pm.util.update_start_vals(start, model.test_point, model)
-        assert_almost_equal(start['lower_interval__'],
-                            test_point['lower_interval__'])
-        assert_almost_equal(start['upper_interval__'],
-                            test_point['upper_interval__'])
-        assert_almost_equal(start['interv_interval__'],
-                            test_point['interv_interval__'])
-
+            a = pm.Uniform("a", lower=0.0, upper=1.0)
+            b = pm.Uniform("b", lower=2.0, upper=3.0)
+            pm.Uniform("lower", lower=a, upper=3.0)
+            pm.Uniform("upper", lower=0.0, upper=b)
+            pm.Uniform("interv", lower=a, upper=b)
 
+        start = {"a": 0.3, "b": 2.1, "lower": 1.4, "upper": 1.4, "interv": 1.4}
+        test_point = {
+            "lower_interval__": -0.3746934494414109,
+            "upper_interval__": 0.693147180559945,
+            "interv_interval__": 0.4519851237430569,
+        }
+        pm.util.update_start_vals(start, model.test_point, model)
+        assert_almost_equal(start["lower_interval__"], test_point["lower_interval__"])
+        assert_almost_equal(start["upper_interval__"], test_point["upper_interval__"])
+        assert_almost_equal(start["interv_interval__"], test_point["interv_interval__"])
diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py
index 867a129579..f16e49d5d7 100644
--- a/pymc3/tests/test_variational_inference.py
+++ b/pymc3/tests/test_variational_inference.py
@@ -11,42 +11,34 @@
 import pymc3.util
 from pymc3.theanof import change_flags
 from pymc3.variational.approximations import (
-    MeanFieldGroup, FullRankGroup,
-    NormalizingFlowGroup, EmpiricalGroup,
-    MeanField, FullRank, NormalizingFlow, Empirical
-)
-from pymc3.variational.inference import (
-    ADVI, FullRankADVI, SVGD, NFVI, ASVGD,
-    fit
+    MeanFieldGroup,
+    FullRankGroup,
+    NormalizingFlowGroup,
+    EmpiricalGroup,
+    MeanField,
+    FullRank,
+    NormalizingFlow,
+    Empirical,
 )
+from pymc3.variational.inference import ADVI, FullRankADVI, SVGD, NFVI, ASVGD, fit
 from pymc3.variational import flows
 from pymc3.variational.opvi import Approximation, Group
 from pymc3.variational import opvi
 from . import models
 from .helpers import not_raises
 
-pytestmark = pytest.mark.usefixtures(
-    'strict_float32',
-    'seeded_test'
-)
+pytestmark = pytest.mark.usefixtures("strict_float32", "seeded_test")
 
 
-@pytest.mark.parametrize(
-    'diff',
-    [
-        'relative',
-        'absolute'
-    ]
-)
-@pytest.mark.parametrize(
-    'ord',
-    [1, 2, np.inf]
-)
+@pytest.mark.parametrize("diff", ["relative", "absolute"])
+@pytest.mark.parametrize("ord", [1, 2, np.inf])
 def test_callbacks_convergence(diff, ord):
-    cb = pm.variational.callbacks.CheckParametersConvergence(every=1, diff=diff, ord=ord)
+    cb = pm.variational.callbacks.CheckParametersConvergence(
+        every=1, diff=diff, ord=ord
+    )
 
     class _approx:
-        params = (theano.shared(np.asarray([1, 2, 3])), )
+        params = (theano.shared(np.asarray([1, 2, 3])),)
 
     approx = _approx()
 
@@ -57,55 +49,68 @@ class _approx:
 
 def test_tracker_callback():
     import time
+
     tracker = pm.callbacks.Tracker(
-        ints=lambda *t: t[-1],
-        ints2=lambda ap, h, j: j,
-        time=time.time,
+        ints=lambda *t: t[-1], ints2=lambda ap, h, j: j, time=time.time
     )
     for i in range(10):
         tracker(None, None, i)
-    assert 'time' in tracker.hist
-    assert 'ints' in tracker.hist
-    assert 'ints2' in tracker.hist
-    assert (len(tracker['ints'])
-            == len(tracker['ints2'])
-            == len(tracker['time'])
-            == 10)
-    assert tracker['ints'] == tracker['ints2'] == list(range(10))
-    tracker = pm.callbacks.Tracker(
-        bad=lambda t: t  # bad signature
-    )
+    assert "time" in tracker.hist
+    assert "ints" in tracker.hist
+    assert "ints2" in tracker.hist
+    assert len(tracker["ints"]) == len(tracker["ints2"]) == len(tracker["time"]) == 10
+    assert tracker["ints"] == tracker["ints2"] == list(range(10))
+    tracker = pm.callbacks.Tracker(bad=lambda t: t)  # bad signature
     with pytest.raises(TypeError):
         tracker(None, None, 1)
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def three_var_model():
     with pm.Model() as model:
-        pm.HalfNormal('one', shape=(10, 2), total_size=100)
-        pm.Normal('two', shape=(10, ))
-        pm.Normal('three', shape=(10, 1, 2))
+        pm.HalfNormal("one", shape=(10, 2), total_size=100)
+        pm.Normal("two", shape=(10,))
+        pm.Normal("three", shape=(10, 1, 2))
     return model
 
 
 @pytest.mark.parametrize(
-    ['raises', 'grouping'],
+    ["raises", "grouping"],
     [
         (not_raises(), {MeanFieldGroup: None}),
-        (not_raises(), {FullRankGroup: None, MeanFieldGroup: ['one']}),
-        (not_raises(), {MeanFieldGroup: ['one'], FullRankGroup: ['two'], NormalizingFlowGroup: ['three']}),
-        (pytest.raises(TypeError, match='Found duplicates'),
-            {MeanFieldGroup: ['one'], FullRankGroup: ['two', 'one'], NormalizingFlowGroup: ['three']}),
-        (pytest.raises(TypeError, match='No approximation is specified'), {MeanFieldGroup: ['one', 'two']}),
-        (not_raises(), {MeanFieldGroup: ['one'], FullRankGroup: ['two', 'three']}),
-    ]
+        (not_raises(), {FullRankGroup: None, MeanFieldGroup: ["one"]}),
+        (
+            not_raises(),
+            {
+                MeanFieldGroup: ["one"],
+                FullRankGroup: ["two"],
+                NormalizingFlowGroup: ["three"],
+            },
+        ),
+        (
+            pytest.raises(TypeError, match="Found duplicates"),
+            {
+                MeanFieldGroup: ["one"],
+                FullRankGroup: ["two", "one"],
+                NormalizingFlowGroup: ["three"],
+            },
+        ),
+        (
+            pytest.raises(TypeError, match="No approximation is specified"),
+            {MeanFieldGroup: ["one", "two"]},
+        ),
+        (not_raises(), {MeanFieldGroup: ["one"], FullRankGroup: ["two", "three"]}),
+    ],
 )
 def test_init_groups(three_var_model, raises, grouping):
     with raises, three_var_model:
         approxes, groups = zip(*grouping.items())
-        groups = [list(map(functools.partial(getattr, three_var_model), g))
-                  if g is not None else None
-                  for g in groups]
+        groups = [
+            list(map(functools.partial(getattr, three_var_model), g))
+            if g is not None
+            else None
+            for g in groups
+        ]
         inited_groups = [a(group=g) for a, g in zip(approxes, groups)]
         approx = Approximation(inited_groups)
         for ig, g in zip(inited_groups, groups):
@@ -117,24 +122,46 @@ def test_init_groups(three_var_model, raises, grouping):
             assert approx.ndim == three_var_model.ndim
 
 
-@pytest.fixture(params=[
+@pytest.fixture(
+    params=[
         ({}, {MeanFieldGroup: (None, {})}),
-        ({}, {FullRankGroup: (None, {}), MeanFieldGroup: (['one'], {})}),
-        ({}, {MeanFieldGroup: (['one'], {}), FullRankGroup: (['two'], {}),
-              NormalizingFlowGroup: (['three'], {'flow': 'scale-hh*2-planar-radial-loc'})}),
-        ({}, {MeanFieldGroup: (['one'], {}), FullRankGroup: (['two', 'three'], {})}),
-        ({}, {MeanFieldGroup: (['one'], {}), EmpiricalGroup: (['two', 'three'], {'size': 100})})
-],
-    ids=lambda t: ', '.join('%s: %s' % (k.__name__, v[0]) for k, v in t[1].items())
+        ({}, {FullRankGroup: (None, {}), MeanFieldGroup: (["one"], {})}),
+        (
+            {},
+            {
+                MeanFieldGroup: (["one"], {}),
+                FullRankGroup: (["two"], {}),
+                NormalizingFlowGroup: (
+                    ["three"],
+                    {"flow": "scale-hh*2-planar-radial-loc"},
+                ),
+            },
+        ),
+        ({}, {MeanFieldGroup: (["one"], {}), FullRankGroup: (["two", "three"], {})}),
+        (
+            {},
+            {
+                MeanFieldGroup: (["one"], {}),
+                EmpiricalGroup: (["two", "three"], {"size": 100}),
+            },
+        ),
+    ],
+    ids=lambda t: ", ".join("%s: %s" % (k.__name__, v[0]) for k, v in t[1].items()),
 )
 def three_var_groups(request, three_var_model):
     kw, grouping = request.param
     approxes, groups = zip(*grouping.items())
     groups, gkwargs = zip(*groups)
-    groups = [list(map(functools.partial(getattr, three_var_model), g))
-              if g is not None else None
-              for g in groups]
-    inited_groups = [a(group=g, model=three_var_model, **gk) for a, g, gk in zip(approxes, groups, gkwargs)]
+    groups = [
+        list(map(functools.partial(getattr, three_var_model), g))
+        if g is not None
+        else None
+        for g in groups
+    ]
+    inited_groups = [
+        a(group=g, model=three_var_model, **gk)
+        for a, g, gk in zip(approxes, groups, gkwargs)
+    ]
     return inited_groups
 
 
@@ -151,30 +178,30 @@ def three_var_approx_single_group_mf(three_var_model):
 
 def test_sample_simple(three_var_approx):
     trace = three_var_approx.sample(500)
-    assert set(trace.varnames) == {'one', 'one_log__', 'three', 'two'}
+    assert set(trace.varnames) == {"one", "one_log__", "three", "two"}
     assert len(trace) == 500
-    assert trace[0]['one'].shape == (10, 2)
-    assert trace[0]['two'].shape == (10, )
-    assert trace[0]['three'].shape == (10, 1, 2)
+    assert trace[0]["one"].shape == (10, 2)
+    assert trace[0]["two"].shape == (10,)
+    assert trace[0]["three"].shape == (10, 1, 2)
 
 
 @pytest.fixture
 def aevb_initial():
-    return theano.shared(np.random.rand(3, 7).astype('float32'))
+    return theano.shared(np.random.rand(3, 7).astype("float32"))
 
 
 @pytest.fixture(
     params=[
         (MeanFieldGroup, {}),
         (FullRankGroup, {}),
-        (NormalizingFlowGroup, {'flow': 'scale'}),
-        (NormalizingFlowGroup, {'flow': 'loc'}),
-        (NormalizingFlowGroup, {'flow': 'hh'}),
-        (NormalizingFlowGroup, {'flow': 'planar'}),
-        (NormalizingFlowGroup, {'flow': 'radial'}),
-        (NormalizingFlowGroup, {'flow': 'radial-loc'})
+        (NormalizingFlowGroup, {"flow": "scale"}),
+        (NormalizingFlowGroup, {"flow": "loc"}),
+        (NormalizingFlowGroup, {"flow": "hh"}),
+        (NormalizingFlowGroup, {"flow": "planar"}),
+        (NormalizingFlowGroup, {"flow": "radial"}),
+        (NormalizingFlowGroup, {"flow": "radial-loc"}),
     ],
-    ids=lambda t: '{c} : {d}'.format(c=t[0].__name__, d=t[1])
+    ids=lambda t: "{c} : {d}".format(c=t[0].__name__, d=t[1]),
 )
 def parametric_grouped_approxes(request):
     return request.param
@@ -190,10 +217,14 @@ def three_var_aevb_groups(parametric_grouped_approxes, three_var_model, aevb_ini
         if isinstance(k, int):
             params[k] = dict()
             for k_i, v_i in v.items():
-                params[k][k_i] = aevb_initial.dot(np.random.rand(7, *v_i).astype('float32'))
+                params[k][k_i] = aevb_initial.dot(
+                    np.random.rand(7, *v_i).astype("float32")
+                )
         else:
-            params[k] = aevb_initial.dot(np.random.rand(7, *v).astype('float32'))
-    aevb_g = cls([three_var_model.one], params=params, model=three_var_model, local=True)
+            params[k] = aevb_initial.dot(np.random.rand(7, *v).astype("float32"))
+    aevb_g = cls(
+        [three_var_model.one], params=params, model=three_var_model, local=True
+    )
     return [aevb_g, MeanFieldGroup(None, model=three_var_model)]
 
 
@@ -204,35 +235,35 @@ def three_var_aevb_approx(three_var_model, three_var_aevb_groups):
 
 
 def test_sample_aevb(three_var_aevb_approx, aevb_initial):
-    pm.KLqp(three_var_aevb_approx).fit(1, more_replacements={
-        aevb_initial: np.zeros_like(aevb_initial.get_value())[:1]
-    })
-    aevb_initial.set_value(np.random.rand(7, 7).astype('float32'))
+    pm.KLqp(three_var_aevb_approx).fit(
+        1, more_replacements={aevb_initial: np.zeros_like(aevb_initial.get_value())[:1]}
+    )
+    aevb_initial.set_value(np.random.rand(7, 7).astype("float32"))
     trace = three_var_aevb_approx.sample(500)
-    assert set(trace.varnames) == {'one', 'one_log__', 'two', 'three'}
+    assert set(trace.varnames) == {"one", "one_log__", "two", "three"}
     assert len(trace) == 500
-    assert trace[0]['one'].shape == (7, 2)
-    assert trace[0]['two'].shape == (10, )
-    assert trace[0]['three'].shape == (10, 1, 2)
+    assert trace[0]["one"].shape == (7, 2)
+    assert trace[0]["two"].shape == (10,)
+    assert trace[0]["three"].shape == (10, 1, 2)
 
-    aevb_initial.set_value(np.random.rand(13, 7).astype('float32'))
+    aevb_initial.set_value(np.random.rand(13, 7).astype("float32"))
     trace = three_var_aevb_approx.sample(500)
-    assert set(trace.varnames) == {'one', 'one_log__', 'two', 'three'}
+    assert set(trace.varnames) == {"one", "one_log__", "two", "three"}
     assert len(trace) == 500
-    assert trace[0]['one'].shape == (13, 2)
-    assert trace[0]['two'].shape == (10,)
-    assert trace[0]['three'].shape == (10, 1, 2)
+    assert trace[0]["one"].shape == (13, 2)
+    assert trace[0]["two"].shape == (10,)
+    assert trace[0]["three"].shape == (10, 1, 2)
 
 
 def test_replacements_in_sample_node_aevb(three_var_aevb_approx, aevb_initial):
-    inp = tt.matrix(dtype='float32')
+    inp = tt.matrix(dtype="float32")
     three_var_aevb_approx.sample_node(
-        three_var_aevb_approx.model.one, 2,
-        more_replacements={aevb_initial: inp}).eval({inp: np.random.rand(7, 7).astype('float32')})
+        three_var_aevb_approx.model.one, 2, more_replacements={aevb_initial: inp}
+    ).eval({inp: np.random.rand(7, 7).astype("float32")})
 
     three_var_aevb_approx.sample_node(
-        three_var_aevb_approx.model.one, None,
-        more_replacements={aevb_initial: inp}).eval({inp: np.random.rand(7, 7).astype('float32')})
+        three_var_aevb_approx.model.one, None, more_replacements={aevb_initial: inp}
+    ).eval({inp: np.random.rand(7, 7).astype("float32")})
 
 
 def test_vae():
@@ -242,24 +273,28 @@ def test_vae():
     x_inp = tt.vector()
     x_inp.tag.test_value = data[:minibatch_size]
 
-    ae = theano.shared(pm.floatX([.1, .1]))
-    be = theano.shared(pm.floatX(1.))
+    ae = theano.shared(pm.floatX([0.1, 0.1]))
+    be = theano.shared(pm.floatX(1.0))
 
-    ad = theano.shared(pm.floatX(1.))
-    bd = theano.shared(pm.floatX(1.))
+    ad = theano.shared(pm.floatX(1.0))
+    bd = theano.shared(pm.floatX(1.0))
 
-    enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be
-    mu,  rho = enc[:, 0], enc[:, 1]
+    enc = x_inp.dimshuffle(0, "x") * ae.dimshuffle("x", 0) + be
+    mu, rho = enc[:, 0], enc[:, 1]
 
     with pm.Model():
         # Hidden variables
-        zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size)
+        zs = pm.Normal("zs", mu=0, sd=1, shape=minibatch_size)
         dec = zs * ad + bd
         # Observation model
-        pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp)
+        pm.Normal("xs_", mu=dec, sd=0.1, observed=x_inp)
 
-        pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)},
-               more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
+        pm.fit(
+            1,
+            local_rv={zs: dict(mu=mu, rho=rho)},
+            more_replacements={x_inp: x_mini},
+            more_obj_params=[ae, be, ad, bd],
+        )
 
 
 def test_logq_mini_1_sample_1_var(parametric_grouped_approxes, three_var_model):
@@ -272,7 +307,9 @@ def test_logq_mini_1_sample_1_var(parametric_grouped_approxes, three_var_model):
 
 def test_logq_mini_2_sample_2_var(parametric_grouped_approxes, three_var_model):
     cls, kw = parametric_grouped_approxes
-    approx = cls([three_var_model.one, three_var_model.two], model=three_var_model, **kw)
+    approx = cls(
+        [three_var_model.one, three_var_model.two], model=three_var_model, **kw
+    )
     logq = approx.logq
     logq = approx.set_size_and_deterministic(logq, 2, 0)
     logq.eval()
@@ -280,7 +317,9 @@ def test_logq_mini_2_sample_2_var(parametric_grouped_approxes, three_var_model):
 
 def test_logq_mini_sample_aevb(three_var_aevb_groups):
     approx = three_var_aevb_groups[0]
-    logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 3, 0)
+    logq, symbolic_logq = approx.set_size_and_deterministic(
+        [approx.logq, approx.symbolic_logq], 3, 0
+    )
     e = logq.eval()
     es = symbolic_logq.eval()
     assert e.shape == ()
@@ -289,13 +328,17 @@ def test_logq_mini_sample_aevb(three_var_aevb_groups):
 
 def test_logq_aevb(three_var_aevb_approx):
     approx = three_var_aevb_approx
-    logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 1, 0)
+    logq, symbolic_logq = approx.set_size_and_deterministic(
+        [approx.logq, approx.symbolic_logq], 1, 0
+    )
     e = logq.eval()
     es = symbolic_logq.eval()
     assert e.shape == ()
     assert es.shape == (1,)
 
-    logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 2, 0)
+    logq, symbolic_logq = approx.set_size_and_deterministic(
+        [approx.logq, approx.symbolic_logq], 2, 0
+    )
     e = logq.eval()
     es = symbolic_logq.eval()
     assert e.shape == ()
@@ -304,15 +347,19 @@ def test_logq_aevb(three_var_aevb_approx):
 
 def test_logq_globals(three_var_approx):
     if not three_var_approx.has_logq:
-        pytest.skip('%s does not implement logq' % three_var_approx)
+        pytest.skip("%s does not implement logq" % three_var_approx)
     approx = three_var_approx
-    logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 1, 0)
+    logq, symbolic_logq = approx.set_size_and_deterministic(
+        [approx.logq, approx.symbolic_logq], 1, 0
+    )
     e = logq.eval()
     es = symbolic_logq.eval()
     assert e.shape == ()
     assert es.shape == (1,)
 
-    logq, symbolic_logq = approx.set_size_and_deterministic([approx.logq, approx.symbolic_logq], 2, 0)
+    logq, symbolic_logq = approx.set_size_and_deterministic(
+        [approx.logq, approx.symbolic_logq], 2, 0
+    )
     e = logq.eval()
     es = symbolic_logq.eval()
     assert e.shape == ()
@@ -320,79 +367,118 @@ def test_logq_globals(three_var_approx):
 
 
 @pytest.mark.parametrize(
-    'raises, vfam, type_, kw',
+    "raises, vfam, type_, kw",
     [
-        (not_raises(), 'mean_field', MeanFieldGroup, {}),
-        (not_raises(), 'mf', MeanFieldGroup, {}),
-        (not_raises(), 'full_rank', FullRankGroup, {}),
-        (not_raises(), 'fr', FullRankGroup, {}),
-        (not_raises(), 'FR', FullRankGroup, {}),
-        (not_raises(), 'loc', NormalizingFlowGroup, {}),
-        (not_raises(), 'scale', NormalizingFlowGroup, {}),
-        (not_raises(), 'hh', NormalizingFlowGroup, {}),
-        (not_raises(), 'planar', NormalizingFlowGroup, {}),
-        (not_raises(), 'radial', NormalizingFlowGroup, {}),
-        (not_raises(), 'scale-loc', NormalizingFlowGroup, {}),
-        (pytest.raises(ValueError, match='Need `trace` or `size`'), 'empirical', EmpiricalGroup, {}),
-        (not_raises(), 'empirical', EmpiricalGroup, {'size': 100}),
-    ]
+        (not_raises(), "mean_field", MeanFieldGroup, {}),
+        (not_raises(), "mf", MeanFieldGroup, {}),
+        (not_raises(), "full_rank", FullRankGroup, {}),
+        (not_raises(), "fr", FullRankGroup, {}),
+        (not_raises(), "FR", FullRankGroup, {}),
+        (not_raises(), "loc", NormalizingFlowGroup, {}),
+        (not_raises(), "scale", NormalizingFlowGroup, {}),
+        (not_raises(), "hh", NormalizingFlowGroup, {}),
+        (not_raises(), "planar", NormalizingFlowGroup, {}),
+        (not_raises(), "radial", NormalizingFlowGroup, {}),
+        (not_raises(), "scale-loc", NormalizingFlowGroup, {}),
+        (
+            pytest.raises(ValueError, match="Need `trace` or `size`"),
+            "empirical",
+            EmpiricalGroup,
+            {},
+        ),
+        (not_raises(), "empirical", EmpiricalGroup, {"size": 100}),
+    ],
 )
 def test_group_api_vfam(three_var_model, raises, vfam, type_, kw):
     with three_var_model, raises:
         g = Group([three_var_model.one], vfam, **kw)
         assert isinstance(g, type_)
-        assert not hasattr(g, '_kwargs')
+        assert not hasattr(g, "_kwargs")
         if isinstance(g, NormalizingFlowGroup):
             assert isinstance(g.flow, pm.flows.AbstractFlow)
             assert g.flow.formula == vfam
 
 
 @pytest.mark.parametrize(
-    'raises, params, type_, kw, formula',
+    "raises, params, type_, kw, formula",
     [
-        (not_raises(),
-         dict(mu=np.ones((10, 2), 'float32'), rho=np.ones((10, 2), 'float32')),
-         MeanFieldGroup, {}, None),
-
-        (not_raises(),
-         dict(mu=np.ones((10, 2), 'float32'),
-              L_tril=np.ones(
-                  FullRankGroup.get_param_spec_for(d=np.prod((10, 2)))['L_tril'],
-                  'float32'
-              )),
-         FullRankGroup, {}, None),
-
-        (not_raises(),
-         {0: dict(loc=np.ones((10, 2), 'float32'))},
-         NormalizingFlowGroup, {}, 'loc'),
-
-        (not_raises(),
-         {0: dict(rho=np.ones((10, 2), 'float32'))},
-         NormalizingFlowGroup, {}, 'scale'),
-
-        (not_raises(),
-         {0: dict(v=np.ones((10, 2), 'float32'),)},
-         NormalizingFlowGroup, {}, 'hh'),
-
-        (not_raises(),
-         {0: dict(u=np.ones((10, 2), 'float32'),
-                  w=np.ones((10, 2), 'float32'),
-                  b=1.)},
-         NormalizingFlowGroup, {}, 'planar'),
-
-        (not_raises(),
-         {0: dict(z_ref=np.ones((10, 2), 'float32'),
-                  a=1.,
-                  b=1.)},
-         NormalizingFlowGroup, {}, 'radial'),
-
-        (not_raises(),
-         {0: dict(rho=np.ones((10, 2), 'float32')),
-          1: dict(loc=np.ones((10, 2), 'float32'))},
-         NormalizingFlowGroup, {}, 'scale-loc'),
-
-        (not_raises(), dict(histogram=np.ones((20, 10, 2), 'float32')), EmpiricalGroup, {}, None),
-    ]
+        (
+            not_raises(),
+            dict(mu=np.ones((10, 2), "float32"), rho=np.ones((10, 2), "float32")),
+            MeanFieldGroup,
+            {},
+            None,
+        ),
+        (
+            not_raises(),
+            dict(
+                mu=np.ones((10, 2), "float32"),
+                L_tril=np.ones(
+                    FullRankGroup.get_param_spec_for(d=np.prod((10, 2)))["L_tril"],
+                    "float32",
+                ),
+            ),
+            FullRankGroup,
+            {},
+            None,
+        ),
+        (
+            not_raises(),
+            {0: dict(loc=np.ones((10, 2), "float32"))},
+            NormalizingFlowGroup,
+            {},
+            "loc",
+        ),
+        (
+            not_raises(),
+            {0: dict(rho=np.ones((10, 2), "float32"))},
+            NormalizingFlowGroup,
+            {},
+            "scale",
+        ),
+        (
+            not_raises(),
+            {0: dict(v=np.ones((10, 2), "float32"))},
+            NormalizingFlowGroup,
+            {},
+            "hh",
+        ),
+        (
+            not_raises(),
+            {
+                0: dict(
+                    u=np.ones((10, 2), "float32"), w=np.ones((10, 2), "float32"), b=1.0
+                )
+            },
+            NormalizingFlowGroup,
+            {},
+            "planar",
+        ),
+        (
+            not_raises(),
+            {0: dict(z_ref=np.ones((10, 2), "float32"), a=1.0, b=1.0)},
+            NormalizingFlowGroup,
+            {},
+            "radial",
+        ),
+        (
+            not_raises(),
+            {
+                0: dict(rho=np.ones((10, 2), "float32")),
+                1: dict(loc=np.ones((10, 2), "float32")),
+            },
+            NormalizingFlowGroup,
+            {},
+            "scale-loc",
+        ),
+        (
+            not_raises(),
+            dict(histogram=np.ones((20, 10, 2), "float32")),
+            EmpiricalGroup,
+            {},
+            None,
+        ),
+    ],
 )
 def test_group_api_params(three_var_model, raises, params, type_, kw, formula):
     with three_var_model, raises:
@@ -408,15 +494,15 @@ def test_group_api_params(three_var_model, raises, params, type_, kw, formula):
 
 
 @pytest.mark.parametrize(
-    'gcls, approx, kw',
+    "gcls, approx, kw",
     [
         (MeanFieldGroup, MeanField, {}),
         (FullRankGroup, FullRank, {}),
-        (EmpiricalGroup, Empirical, {'size': 100}),
-        (NormalizingFlowGroup, NormalizingFlow, {'flow': 'loc'}),
-        (NormalizingFlowGroup, NormalizingFlow, {'flow': 'scale-loc-scale'}),
-        (NormalizingFlowGroup, NormalizingFlow, {})
-    ]
+        (EmpiricalGroup, Empirical, {"size": 100}),
+        (NormalizingFlowGroup, NormalizingFlow, {"flow": "loc"}),
+        (NormalizingFlowGroup, NormalizingFlow, {"flow": "scale-loc-scale"}),
+        (NormalizingFlowGroup, NormalizingFlow, {}),
+    ],
 )
 def test_single_group_shortcuts(three_var_model, approx, kw, gcls):
     with three_var_model:
@@ -425,7 +511,7 @@ def test_single_group_shortcuts(three_var_model, approx, kw, gcls):
     assert len(a.groups) == 1
     assert isinstance(a.groups[0], gcls)
     if isinstance(a, NormalizingFlow):
-        assert a.flow.formula == kw.get('flow', NormalizingFlowGroup.default_flow)
+        assert a.flow.formula == kw.get("flow", NormalizingFlowGroup.default_flow)
 
 
 def test_elbo():
@@ -437,37 +523,39 @@ def test_elbo():
     post_sd = np.array([1], dtype=theano.config.floatX)
     # Create a model for test
     with pm.Model() as model:
-        mu = pm.Normal('mu', mu=mu0, sd=sigma)
-        pm.Normal('y', mu=mu, sd=1, observed=y_obs)
+        mu = pm.Normal("mu", mu=mu0, sd=sigma)
+        pm.Normal("y", mu=mu, sd=1, observed=y_obs)
 
     # Create variational gradient tensor
     mean_field = MeanField(model=model)
-    with pm.theanof.change_flags(compute_test_value='off'):
+    with pm.theanof.change_flags(compute_test_value="off"):
         elbo = -pm.operators.KL(mean_field)()(10000)
 
-    mean_field.shared_params['mu'].set_value(post_mu)
-    mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))
+    mean_field.shared_params["mu"].set_value(post_mu)
+    mean_field.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1))
 
     f = theano.function([], elbo)
     elbo_mc = f()
 
     # Exact value
-    elbo_true = (-0.5 * (
-        3 + 3 * post_mu ** 2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu +
-        y_obs[0] ** 2 + y_obs[1] ** 2 + mu0 ** 2 + 3 * np.log(2 * np.pi)) +
-                 0.5 * (np.log(2 * np.pi) + 1))
+    elbo_true = -0.5 * (
+        3
+        + 3 * post_mu ** 2
+        - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu
+        + y_obs[0] ** 2
+        + y_obs[1] ** 2
+        + mu0 ** 2
+        + 3 * np.log(2 * np.pi)
+    ) + 0.5 * (np.log(2 * np.pi) + 1)
     np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
 
 
-@pytest.mark.parametrize(
-    'aux_total_size',
-    range(2, 10, 3)
-)
+@pytest.mark.parametrize("aux_total_size", range(2, 10, 3))
 def test_scale_cost_to_minibatch_works(aux_total_size):
     mu0 = 1.5
     sigma = 1.0
     y_obs = np.array([1.6, 1.4])
-    beta = len(y_obs)/float(aux_total_size)
+    beta = len(y_obs) / float(aux_total_size)
     post_mu = np.array([1.88], dtype=theano.config.floatX)
     post_sd = np.array([1], dtype=theano.config.floatX)
 
@@ -475,215 +563,204 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
     # with pm.Model(theano_config=dict(floatX='float64')):
     # did not not work as expected
     # there were some numeric problems, so float64 is forced
-    with pm.theanof.change_flags(floatX='float64', warn_float64='ignore'):
+    with pm.theanof.change_flags(floatX="float64", warn_float64="ignore"):
         with pm.Model():
-            assert theano.config.floatX == 'float64'
-            assert theano.config.warn_float64 == 'ignore'
-            mu = pm.Normal('mu', mu=mu0, sd=sigma)
-            pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
+            assert theano.config.floatX == "float64"
+            assert theano.config.warn_float64 == "ignore"
+            mu = pm.Normal("mu", mu=mu0, sd=sigma)
+            pm.Normal("y", mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
             # Create variational gradient tensor
             mean_field_1 = MeanField()
             assert mean_field_1.scale_cost_to_minibatch
-            mean_field_1.shared_params['mu'].set_value(post_mu)
-            mean_field_1.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))
+            mean_field_1.shared_params["mu"].set_value(post_mu)
+            mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1))
 
-            with pm.theanof.change_flags(compute_test_value='off'):
+            with pm.theanof.change_flags(compute_test_value="off"):
                 elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)
 
         with pm.Model():
-            mu = pm.Normal('mu', mu=mu0, sd=sigma)
-            pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
+            mu = pm.Normal("mu", mu=mu0, sd=sigma)
+            pm.Normal("y", mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
             # Create variational gradient tensor
             mean_field_2 = MeanField()
             assert mean_field_1.scale_cost_to_minibatch
             mean_field_2.scale_cost_to_minibatch = False
             assert not mean_field_2.scale_cost_to_minibatch
-            mean_field_2.shared_params['mu'].set_value(post_mu)
-            mean_field_2.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))
+            mean_field_2.shared_params["mu"].set_value(post_mu)
+            mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1))
 
-        with pm.theanof.change_flags(compute_test_value='off'):
+        with pm.theanof.change_flags(compute_test_value="off"):
             elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000)
 
-        np.testing.assert_allclose(elbo_via_total_size_unscaled.eval(),
-                                   elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1)
+        np.testing.assert_allclose(
+            elbo_via_total_size_unscaled.eval(),
+            elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta),
+            rtol=0.02,
+            atol=1e-1,
+        )
 
 
-@pytest.mark.parametrize(
-    'aux_total_size',
-    range(2, 10, 3)
-)
+@pytest.mark.parametrize("aux_total_size", range(2, 10, 3))
 def test_elbo_beta_kl(aux_total_size):
     mu0 = 1.5
     sigma = 1.0
     y_obs = np.array([1.6, 1.4])
-    beta = len(y_obs)/float(aux_total_size)
+    beta = len(y_obs) / float(aux_total_size)
     post_mu = np.array([1.88], dtype=theano.config.floatX)
     post_sd = np.array([1], dtype=theano.config.floatX)
-    with pm.theanof.change_flags(floatX='float64', warn_float64='ignore'):
+    with pm.theanof.change_flags(floatX="float64", warn_float64="ignore"):
         with pm.Model():
-            mu = pm.Normal('mu', mu=mu0, sd=sigma)
-            pm.Normal('y', mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
+            mu = pm.Normal("mu", mu=mu0, sd=sigma)
+            pm.Normal("y", mu=mu, sd=1, observed=y_obs, total_size=aux_total_size)
             # Create variational gradient tensor
             mean_field_1 = MeanField()
             mean_field_1.scale_cost_to_minibatch = True
-            mean_field_1.shared_params['mu'].set_value(post_mu)
-            mean_field_1.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))
+            mean_field_1.shared_params["mu"].set_value(post_mu)
+            mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1))
 
-            with pm.theanof.change_flags(compute_test_value='off'):
+            with pm.theanof.change_flags(compute_test_value="off"):
                 elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)
 
         with pm.Model():
-            mu = pm.Normal('mu', mu=mu0, sd=sigma)
-            pm.Normal('y', mu=mu, sd=1, observed=y_obs)
+            mu = pm.Normal("mu", mu=mu0, sd=sigma)
+            pm.Normal("y", mu=mu, sd=1, observed=y_obs)
             # Create variational gradient tensor
             mean_field_3 = MeanField()
-            mean_field_3.shared_params['mu'].set_value(post_mu)
-            mean_field_3.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))
+            mean_field_3.shared_params["mu"].set_value(post_mu)
+            mean_field_3.shared_params["rho"].set_value(np.log(np.exp(post_sd) - 1))
 
-            with pm.theanof.change_flags(compute_test_value='off'):
+            with pm.theanof.change_flags(compute_test_value="off"):
                 elbo_via_beta_kl = -pm.operators.KL(mean_field_3, beta=beta)()(10000)
 
-        np.testing.assert_allclose(elbo_via_total_size_scaled.eval(), elbo_via_beta_kl.eval(), rtol=0, atol=1e-1)
+        np.testing.assert_allclose(
+            elbo_via_total_size_scaled.eval(),
+            elbo_via_beta_kl.eval(),
+            rtol=0,
+            atol=1e-1,
+        )
 
 
-@pytest.fixture(
-    'module',
-    params=[True, False],
-    ids=['mini', 'full']
-)
+@pytest.fixture("module", params=[True, False], ids=["mini", "full"])
 def use_minibatch(request):
     return request.param
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def simple_model_data(use_minibatch):
     n = 1000
-    sd0 = 2.
-    mu0 = 4.
-    sd = 3.
-    mu = -5.
+    sd0 = 2.0
+    mu0 = 4.0
+    sd = 3.0
+    mu = -5.0
 
     data = sd * np.random.randn(n) + mu
     d = n / sd ** 2 + 1 / sd0 ** 2
     mu_post = (n * np.mean(data) / sd ** 2 + mu0 / sd0 ** 2) / d
     if use_minibatch:
         data = pm.Minibatch(data)
-    return dict(
-        n=n,
-        data=data,
-        mu_post=mu_post,
-        d=d,
-        mu0=mu0,
-        sd0=sd0,
-        sd=sd,
-    )
+    return dict(n=n, data=data, mu_post=mu_post, d=d, mu0=mu0, sd0=sd0, sd=sd)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def simple_model(simple_model_data):
     with pm.Model() as model:
         mu_ = pm.Normal(
-            'mu', mu=simple_model_data['mu0'],
-            sd=simple_model_data['sd0'], testval=0)
-        pm.Normal('x', mu=mu_, sd=simple_model_data['sd'],
-                  observed=simple_model_data['data'],
-                  total_size=simple_model_data['n'])
+            "mu", mu=simple_model_data["mu0"], sd=simple_model_data["sd0"], testval=0
+        )
+        pm.Normal(
+            "x",
+            mu=mu_,
+            sd=simple_model_data["sd"],
+            observed=simple_model_data["data"],
+            total_size=simple_model_data["n"],
+        )
     return model
 
 
-@pytest.fixture('module', params=[
-        dict(cls=NFVI, init=dict(flow='scale-loc')),
+@pytest.fixture(
+    "module",
+    params=[
+        dict(cls=NFVI, init=dict(flow="scale-loc")),
         dict(cls=ADVI, init=dict()),
         dict(cls=FullRankADVI, init=dict()),
         dict(cls=SVGD, init=dict(n_particles=500, jitter=1)),
-        dict(cls=ASVGD, init=dict(temperature=1.)),
-    ], ids=[
-        'NFVI=scale-loc',
-        'ADVI',
-        'FullRankADVI',
-        'SVGD',
-        'ASVGD'
-    ])
+        dict(cls=ASVGD, init=dict(temperature=1.0)),
+    ],
+    ids=["NFVI=scale-loc", "ADVI", "FullRankADVI", "SVGD", "ASVGD"],
+)
 def inference_spec(request):
-    cls = request.param['cls']
-    init = request.param['init']
+    cls = request.param["cls"]
+    init = request.param["init"]
 
     def init_(**kw):
         k = init.copy()
         k.update(kw)
         return cls(**k)
+
     init_.cls = cls
     return init_
 
 
-@pytest.fixture('function')
+@pytest.fixture("function")
 def inference(inference_spec, simple_model):
     with simple_model:
         return inference_spec()
 
 
-@pytest.fixture('function')
+@pytest.fixture("function")
 def fit_kwargs(inference, use_minibatch):
     _select = {
-        (ADVI, 'full'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.02, n_win=50),
-            n=5000
+        (ADVI, "full"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.02, n_win=50), n=5000
         ),
-        (ADVI, 'mini'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50),
-            n=12000
+        (ADVI, "mini"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), n=12000
         ),
-        (NFVI, 'full'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50),
-            n=12000
+        (NFVI, "full"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), n=12000
         ),
-        (NFVI, 'mini'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50),
-            n=12000
+        (NFVI, "mini"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.01, n_win=50), n=12000
         ),
-        (FullRankADVI, 'full'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50),
-            n=6000
+        (FullRankADVI, "full"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50), n=6000
         ),
-        (FullRankADVI, 'mini'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50),
-            n=12000
+        (FullRankADVI, "mini"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.007, n_win=50), n=12000
         ),
-        (SVGD, 'full'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7),
-            n=300
+        (SVGD, "full"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7), n=300
         ),
-        (SVGD, 'mini'): dict(
-            obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7),
-            n=300
+        (SVGD, "mini"): dict(
+            obj_optimizer=pm.adagrad_window(learning_rate=0.075, n_win=7), n=300
         ),
-        (ASVGD, 'full'): dict(
+        (ASVGD, "full"): dict(
             obj_optimizer=pm.adagrad_window(learning_rate=0.07, n_win=10),
-            n=500, obj_n_mc=300
+            n=500,
+            obj_n_mc=300,
         ),
-        (ASVGD, 'mini'): dict(
+        (ASVGD, "mini"): dict(
             obj_optimizer=pm.adagrad_window(learning_rate=0.07, n_win=10),
-            n=500, obj_n_mc=300
-        )
+            n=500,
+            obj_n_mc=300,
+        ),
     }
     if use_minibatch:
-        key = 'mini'
+        key = "mini"
         # backward compat for PR#3071
         inference.approx.scale_cost_to_minibatch = False
     else:
-        key = 'full'
+        key = "full"
     return _select[(type(inference), key)]
 
 
-@pytest.mark.run('first')
-def test_fit_oo(inference,
-                fit_kwargs,
-                simple_model_data):
+@pytest.mark.run("first")
+def test_fit_oo(inference, fit_kwargs, simple_model_data):
     trace = inference.fit(**fit_kwargs).sample(10000)
-    mu_post = simple_model_data['mu_post']
-    d = simple_model_data['d']
-    np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
-    np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)
+    mu_post = simple_model_data["mu_post"]
+    d = simple_model_data["d"]
+    np.testing.assert_allclose(np.mean(trace["mu"]), mu_post, rtol=0.05)
+    np.testing.assert_allclose(np.std(trace["mu"]), np.sqrt(1.0 / d), rtol=0.1)
 
 
 def test_profile(inference):
@@ -692,20 +769,21 @@ def test_profile(inference):
 
 def test_remove_scan_op():
     with pm.Model():
-        pm.Normal('n', 0, 1)
+        pm.Normal("n", 0, 1)
         inference = ADVI()
         buff = six.StringIO()
         inference.run_profiling(n=10).summary(buff)
-        assert 'theano.scan_module.scan_op.Scan' not in buff.getvalue()
+        assert "theano.scan_module.scan_op.Scan" not in buff.getvalue()
         buff.close()
 
 
 def test_clear_cache():
     import pickle
+
     pymc3.memoize.clear_cache()
     assert all(len(c) == 0 for c in pymc3.memoize.CACHE_REGISTRY)
     with pm.Model():
-        pm.Normal('n', 0, 1)
+        pm.Normal("n", 0, 1)
         inference = ADVI()
         inference.fit(n=10)
         assert any(len(c) != 0 for c in inference.approx._cache.values())
@@ -713,7 +791,7 @@ def test_clear_cache():
         # should not be cleared at this call
         assert all(len(c) == 0 for c in inference.approx._cache.values())
         new_a = pickle.loads(pickle.dumps(inference.approx))
-        assert not hasattr(new_a, '_cache')
+        assert not hasattr(new_a, "_cache")
         inference_new = pm.KLqp(new_a)
         inference_new.fit(n=10)
         assert any(len(c) != 0 for c in inference_new.approx._cache.values())
@@ -721,46 +799,43 @@ def test_clear_cache():
         assert all(len(c) == 0 for c in inference_new.approx._cache.values())
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def another_simple_model():
     _model = models.simple_model()[1]
     with _model:
-        pm.Potential('pot', tt.ones((10, 10)))
+        pm.Potential("pot", tt.ones((10, 10)))
     return _model
 
 
-@pytest.fixture(params=[
-    dict(name='advi', kw=dict(start={})),
-    dict(name='fullrank_advi', kw=dict(start={})),
-    dict(name='svgd', kw=dict(start={}))],
-    ids=lambda d: d['name']
+@pytest.fixture(
+    params=[
+        dict(name="advi", kw=dict(start={})),
+        dict(name="fullrank_advi", kw=dict(start={})),
+        dict(name="svgd", kw=dict(start={})),
+    ],
+    ids=lambda d: d["name"],
 )
 def fit_method_with_object(request, another_simple_model):
-    _select = dict(
-        advi=ADVI,
-        fullrank_advi=FullRankADVI,
-        svgd=SVGD
-    )
+    _select = dict(advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD)
     with another_simple_model:
-        return _select[request.param['name']](
-            **request.param['kw'])
+        return _select[request.param["name"]](**request.param["kw"])
 
 
 @pytest.mark.parametrize(
-    ['method', 'kwargs', 'error'],
+    ["method", "kwargs", "error"],
     [
-        ('undefined', dict(), KeyError),
+        ("undefined", dict(), KeyError),
         (1, dict(), TypeError),
-        ('advi', dict(total_grad_norm_constraint=10), None),
-        ('fullrank_advi', dict(), None),
-        ('svgd', dict(total_grad_norm_constraint=10), None),
-        ('svgd', dict(start={}), None),
+        ("advi", dict(total_grad_norm_constraint=10), None),
+        ("fullrank_advi", dict(), None),
+        ("svgd", dict(total_grad_norm_constraint=10), None),
+        ("svgd", dict(start={}), None),
         # start argument is not allowed for ASVGD
-        ('asvgd', dict(start={}, total_grad_norm_constraint=10), TypeError),
-        ('asvgd', dict(total_grad_norm_constraint=10), None),
-        ('nfvi', dict(start={}), None),
-        ('nfvi=scale-loc', dict(start={}), None),
-        ('nfvi=bad-formula', dict(start={}), KeyError),
+        ("asvgd", dict(start={}, total_grad_norm_constraint=10), TypeError),
+        ("asvgd", dict(total_grad_norm_constraint=10), None),
+        ("nfvi", dict(start={}), None),
+        ("nfvi=scale-loc", dict(start={}), None),
+        ("nfvi=bad-formula", dict(start={}), KeyError),
     ],
 )
 def test_fit_fn_text(method, kwargs, error, another_simple_model):
@@ -772,40 +847,38 @@ def test_fit_fn_text(method, kwargs, error, another_simple_model):
             fit(10, method=method, **kwargs)
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def aevb_model():
     with pm.Model() as model:
-        pm.HalfNormal('x', shape=(2,), total_size=5)
-        pm.Normal('y', shape=(2,))
+        pm.HalfNormal("x", shape=(2,), total_size=5)
+        pm.Normal("y", shape=(2,))
     x = model.x
     y = model.y
     mu = theano.shared(x.init_value)
     rho = theano.shared(np.zeros_like(x.init_value))
-    return {
-        'model': model,
-        'y': y,
-        'x': x,
-        'replace': dict(mu=mu, rho=rho)
-    }
+    return {"model": model, "y": y, "x": x, "replace": dict(mu=mu, rho=rho)}
 
 
 def test_aevb(inference_spec, aevb_model):
     # add to inference that supports aevb
-    x = aevb_model['x']
-    y = aevb_model['y']
-    model = aevb_model['model']
-    replace = aevb_model['replace']
+    x = aevb_model["x"]
+    y = aevb_model["y"]
+    model = aevb_model["model"]
+    replace = aevb_model["replace"]
     with model:
         try:
-            inference = inference_spec(local_rv={x: {'mu': replace['mu']*5, 'rho': replace['rho']}})
-            approx = inference.fit(3, obj_n_mc=2, more_obj_params=list(replace.values()))
+            inference = inference_spec(
+                local_rv={x: {"mu": replace["mu"] * 5, "rho": replace["rho"]}}
+            )
+            approx = inference.fit(
+                3, obj_n_mc=2, more_obj_params=list(replace.values())
+            )
             approx.sample(10)
             approx.sample_node(
-                y,
-                more_replacements={x: np.asarray([1, 1], dtype=x.dtype)}
+                y, more_replacements={x: np.asarray([1, 1], dtype=x.dtype)}
             ).eval()
         except pm.opvi.AEVBInferenceError:
-            pytest.skip('Does not support AEVB')
+            pytest.skip("Does not support AEVB")
 
 
 def test_rowwise_approx(three_var_model, parametric_grouped_approxes):
@@ -813,19 +886,20 @@ def test_rowwise_approx(three_var_model, parametric_grouped_approxes):
     cls, kw = parametric_grouped_approxes
     with three_var_model:
         try:
-            approx = Approximation([cls([three_var_model.one], rowwise=True, **kw), Group(None, vfam='mf')])
+            approx = Approximation(
+                [cls([three_var_model.one], rowwise=True, **kw), Group(None, vfam="mf")]
+            )
             inference = pm.KLqp(approx)
             approx = inference.fit(3, obj_n_mc=2)
             approx.sample(10)
-            approx.sample_node(
-                three_var_model.one
-            ).eval()
+            approx.sample_node(three_var_model.one).eval()
         except pm.opvi.BatchedGroupError:
-            pytest.skip('Does not support rowwise grouping')
+            pytest.skip("Does not support rowwise grouping")
 
 
 def test_pickle_approx(three_var_approx):
     import pickle
+
     dump = pickle.dumps(three_var_approx)
     new = pickle.loads(dump)
     assert new.sample(1)
@@ -833,6 +907,7 @@ def test_pickle_approx(three_var_approx):
 
 def test_pickle_single_group(three_var_approx_single_group_mf):
     import pickle
+
     dump = pickle.dumps(three_var_approx_single_group_mf)
     new = pickle.loads(dump)
     assert new.sample(1)
@@ -840,28 +915,29 @@ def test_pickle_single_group(three_var_approx_single_group_mf):
 
 def test_pickle_approx_aevb(three_var_aevb_approx):
     import pickle
+
     dump = pickle.dumps(three_var_aevb_approx)
     new = pickle.loads(dump)
     assert new.sample(1000)
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def binomial_model():
     n_samples = 100
     xs = np.random.binomial(n=1, p=0.2, size=n_samples)
     with pm.Model() as model:
-        p = pm.Beta('p', alpha=1, beta=1)
-        pm.Binomial('xs', n=1, p=p, observed=xs)
+        p = pm.Beta("p", alpha=1, beta=1)
+        pm.Binomial("xs", n=1, p=p, observed=xs)
     return model
 
 
-@pytest.fixture('module')
+@pytest.fixture("module")
 def binomial_model_inference(binomial_model, inference_spec):
     with binomial_model:
         return inference_spec()
 
 
-@pytest.mark.run(after='test_sample_replacements')
+@pytest.mark.run(after="test_sample_replacements")
 def test_replacements(binomial_model_inference):
     d = tt.bscalar()
     d.tag.test_value = 1
@@ -869,32 +945,20 @@ def test_replacements(binomial_model_inference):
     p = approx.model.p
     p_t = p ** 3
     p_s = approx.sample_node(p_t)
-    if theano.config.compute_test_value != 'off':
+    if theano.config.compute_test_value != "off":
         assert p_s.tag.test_value.shape == p_t.tag.test_value.shape
     sampled = [p_s.eval() for _ in range(100)]
-    assert any(map(
-        operator.ne,
-        sampled[1:], sampled[:-1])
-    )  # stochastic
+    assert any(map(operator.ne, sampled[1:], sampled[:-1]))  # stochastic
 
     p_d = approx.sample_node(p_t, deterministic=True)
     sampled = [p_d.eval() for _ in range(100)]
-    assert all(map(
-        operator.eq,
-        sampled[1:], sampled[:-1])
-    )  # deterministic
+    assert all(map(operator.eq, sampled[1:], sampled[:-1]))  # deterministic
 
     p_r = approx.sample_node(p_t, deterministic=d)
     sampled = [p_r.eval({d: 1}) for _ in range(100)]
-    assert all(map(
-        operator.eq,
-        sampled[1:], sampled[:-1])
-    )  # deterministic
+    assert all(map(operator.eq, sampled[1:], sampled[:-1]))  # deterministic
     sampled = [p_r.eval({d: 0}) for _ in range(100)]
-    assert any(map(
-        operator.ne,
-        sampled[1:], sampled[:-1])
-    )  # stochastic
+    assert any(map(operator.ne, sampled[1:], sampled[:-1]))  # stochastic
 
 
 def test_sample_replacements(binomial_model_inference):
@@ -904,21 +968,15 @@ def test_sample_replacements(binomial_model_inference):
     p = approx.model.p
     p_t = p ** 3
     p_s = approx.sample_node(p_t, size=100)
-    if theano.config.compute_test_value != 'off':
-        assert p_s.tag.test_value.shape == (100, ) + p_t.tag.test_value.shape
+    if theano.config.compute_test_value != "off":
+        assert p_s.tag.test_value.shape == (100,) + p_t.tag.test_value.shape
     sampled = p_s.eval()
-    assert any(map(
-        operator.ne,
-        sampled[1:], sampled[:-1])
-    )  # stochastic
+    assert any(map(operator.ne, sampled[1:], sampled[:-1]))  # stochastic
     assert sampled.shape[0] == 100
 
     p_d = approx.sample_node(p_t, size=i)
     sampled = p_d.eval({i: 100})
-    assert any(map(
-        operator.ne,
-        sampled[1:], sampled[:-1])
-    )  # deterministic
+    assert any(map(operator.ne, sampled[1:], sampled[:-1]))  # deterministic
     assert sampled.shape[0] == 100
     sampled = p_d.eval({i: 101})
     assert sampled.shape[0] == 101
@@ -930,25 +988,27 @@ def test_discrete_not_allowed():
     y = np.random.normal(mu_true[z_true], np.ones_like(z_true))
 
     with pm.Model():
-        mu = pm.Normal('mu', mu=0, sd=10, shape=3)
-        z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y))
-        pm.Normal('y_obs', mu=mu[z], sd=1., observed=y)
+        mu = pm.Normal("mu", mu=0, sd=10, shape=3)
+        z = pm.Categorical("z", p=tt.ones(3) / 3, shape=len(y))
+        pm.Normal("y_obs", mu=mu[z], sd=1.0, observed=y)
         with pytest.raises(opvi.ParametrizationError):
             pm.fit(n=1)  # fails
 
 
 def test_var_replacement():
     X_mean = pm.floatX(np.linspace(0, 10, 10))
-    y = pm.floatX(np.random.normal(X_mean*4, .05))
+    y = pm.floatX(np.random.normal(X_mean * 4, 0.05))
     with pm.Model():
-        inp = pm.Normal('X', X_mean, shape=X_mean.shape)
-        coef = pm.Normal('b', 4.)
+        inp = pm.Normal("X", X_mean, shape=X_mean.shape)
+        coef = pm.Normal("b", 4.0)
         mean = inp * coef
-        pm.Normal('y', mean, .1, observed=y)
+        pm.Normal("y", mean, 0.1, observed=y)
         advi = pm.fit(100)
-        assert advi.sample_node(mean).eval().shape == (10, )
+        assert advi.sample_node(mean).eval().shape == (10,)
         x_new = pm.floatX(np.linspace(0, 10, 11))
-        assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11, )
+        assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (
+            11,
+        )
 
 
 def test_empirical_from_trace(another_simple_model):
@@ -964,30 +1024,31 @@ def test_empirical_from_trace(another_simple_model):
 
 @pytest.fixture(
     params=[
-        dict(cls=flows.PlanarFlow, init=dict(jitter=.1)),
-        dict(cls=flows.RadialFlow, init=dict(jitter=.1)),
-        dict(cls=flows.ScaleFlow, init=dict(jitter=.1)),
-        dict(cls=flows.LocFlow, init=dict(jitter=.1)),
-        dict(cls=flows.HouseholderFlow, init=dict(jitter=.1)),
+        dict(cls=flows.PlanarFlow, init=dict(jitter=0.1)),
+        dict(cls=flows.RadialFlow, init=dict(jitter=0.1)),
+        dict(cls=flows.ScaleFlow, init=dict(jitter=0.1)),
+        dict(cls=flows.LocFlow, init=dict(jitter=0.1)),
+        dict(cls=flows.HouseholderFlow, init=dict(jitter=0.1)),
     ],
-    ids=lambda d: d['cls'].__name__
+    ids=lambda d: d["cls"].__name__,
 )
 def flow_spec(request):
-    cls = request.param['cls']
-    init = request.param['init']
+    cls = request.param["cls"]
+    init = request.param["init"]
 
     def init_(**kw):
         k = init.copy()
         k.update(kw)
         return cls(**k)
+
     init_.cls = cls
     return init_
 
 
 def test_flow_det(flow_spec):
-    z0 = tt.arange(0, 20).astype('float32')
-    flow = flow_spec(dim=20, z0=z0.dimshuffle('x', 0))
-    with change_flags(compute_test_value='off'):
+    z0 = tt.arange(0, 20).astype("float32")
+    flow = flow_spec(dim=20, z0=z0.dimshuffle("x", 0))
+    with change_flags(compute_test_value="off"):
         z1 = flow.forward.flatten()
         J = tt.jacobian(z1, z0)
         logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
@@ -996,14 +1057,14 @@ def test_flow_det(flow_spec):
 
 
 def test_flow_det_local(flow_spec):
-    z0 = tt.arange(0, 12).astype('float32')
+    z0 = tt.arange(0, 12).astype("float32")
     spec = flow_spec.cls.get_param_spec_for(d=12)
     params = dict()
     for k, shp in spec.items():
-        params[k] = np.random.randn(1, *shp).astype('float32')
+        params[k] = np.random.randn(1, *shp).astype("float32")
     flow = flow_spec(dim=12, z0=z0.reshape((1, 1, 12)), **params)
     assert flow.batched
-    with change_flags(compute_test_value='off'):
+    with change_flags(compute_test_value="off"):
         z1 = flow.forward.flatten()
         J = tt.jacobian(z1, z0)
         logJdet = tt.log(tt.abs_(tt.nlinalg.det(J)))
@@ -1017,18 +1078,20 @@ def test_flows_collect_chain():
     flow2 = flows.PlanarFlow(dim=2, z0=flow1)
     assert len(flow2.params) == 3
     assert len(flow2.all_params) == 6
-    np.testing.assert_allclose(flow1.logdet.eval() + flow2.logdet.eval(), flow2.sum_logdets.eval())
+    np.testing.assert_allclose(
+        flow1.logdet.eval() + flow2.logdet.eval(), flow2.sum_logdets.eval()
+    )
 
 
 @pytest.mark.parametrize(
-    'formula,length,order',
+    "formula,length,order",
     [
-        ('planar', 1, [flows.PlanarFlow]),
-        ('planar*2', 2, [flows.PlanarFlow] * 2),
-        ('planar-planar', 2, [flows.PlanarFlow] * 2),
-        ('planar-planar*2', 3, [flows.PlanarFlow] * 3),
-        ('hh-planar*2', 3, [flows.HouseholderFlow]+[flows.PlanarFlow] * 2)
-    ]
+        ("planar", 1, [flows.PlanarFlow]),
+        ("planar*2", 2, [flows.PlanarFlow] * 2),
+        ("planar-planar", 2, [flows.PlanarFlow] * 2),
+        ("planar-planar*2", 3, [flows.PlanarFlow] * 3),
+        ("hh-planar*2", 3, [flows.HouseholderFlow] + [flows.PlanarFlow] * 2),
+    ],
 )
 def test_flow_formula(formula, length, order):
     spec = flows.Formula(formula)
diff --git a/pymc3/theanof.py b/pymc3/theanof.py
index 2ea40632d0..66dbadd6ef 100644
--- a/pymc3/theanof.py
+++ b/pymc3/theanof.py
@@ -10,20 +10,22 @@
 from .data import GeneratorAdapter
 from .vartypes import typefilter, continuous_types
 
-__all__ = ['gradient',
-           'hessian',
-           'hessian_diag',
-           'inputvars',
-           'cont_inputs',
-           'floatX',
-           'smartfloatX',
-           'jacobian',
-           'CallableTensor',
-           'join_nonshared_inputs',
-           'make_shared_replacements',
-           'generator',
-           'set_tt_rng',
-           'tt_rng']
+__all__ = [
+    "gradient",
+    "hessian",
+    "hessian_diag",
+    "inputvars",
+    "cont_inputs",
+    "floatX",
+    "smartfloatX",
+    "jacobian",
+    "CallableTensor",
+    "join_nonshared_inputs",
+    "make_shared_replacements",
+    "generator",
+    "set_tt_rng",
+    "tt_rng",
+]
 
 
 def inputvars(a):
@@ -71,10 +73,11 @@ def smartfloatX(x):
     """
     Convert non int types to floatX 
     """
-    if str(x.dtype).startswith('float'):
+    if str(x.dtype).startswith("float"):
         x = floatX(x)
     return x
 
+
 """
 Theano derivative functions
 """
@@ -82,10 +85,10 @@ def smartfloatX(x):
 
 def gradient1(f, v):
     """flat gradient of f wrt v"""
-    return tt.flatten(tt.grad(f, v, disconnected_inputs='warn'))
+    return tt.flatten(tt.grad(f, v, disconnected_inputs="warn"))
 
 
-empty_gradient = tt.zeros(0, dtype='float32')
+empty_gradient = tt.zeros(0, dtype="float32")
 
 
 def gradient(f, vars=None):
@@ -101,7 +104,7 @@ def gradient(f, vars=None):
 def jacobian1(f, v):
     """jacobian of f wrt v"""
     f = tt.flatten(f)
-    idx = tt.arange(f.shape[0], dtype='int32')
+    idx = tt.arange(f.shape[0], dtype="int32")
 
     def grad_i(i):
         return gradient1(f[i], v)
@@ -120,25 +123,25 @@ def jacobian(f, vars=None):
 
 
 def jacobian_diag(f, x):
-    idx = tt.arange(f.shape[0], dtype='int32')
+    idx = tt.arange(f.shape[0], dtype="int32")
 
     def grad_ii(i):
         return theano.grad(f[i], x)[i]
 
-    return theano.scan(grad_ii, sequences=[idx],
-                       n_steps=f.shape[0],
-                       name='jacobian_diag')[0]
+    return theano.scan(
+        grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag"
+    )[0]
 
 
-@change_flags(compute_test_value='ignore')
+@change_flags(compute_test_value="ignore")
 def hessian(f, vars=None):
     return -jacobian(gradient(f, vars), vars)
 
 
-@change_flags(compute_test_value='ignore')
+@change_flags(compute_test_value="ignore")
 def hessian_diag1(f, v):
     g = gradient1(f, v)
-    idx = tt.arange(g.shape[0], dtype='int32')
+    idx = tt.arange(g.shape[0], dtype="int32")
 
     def hess_ii(i):
         return gradient1(g[i], v)[i]
@@ -146,7 +149,7 @@ def hess_ii(i):
     return theano.map(hess_ii, idx)[0]
 
 
-@change_flags(compute_test_value='ignore')
+@change_flags(compute_test_value="ignore")
 def hessian_diag(f, vars=None):
     if vars is None:
         vars = cont_inputs(f)
@@ -165,7 +168,6 @@ def makeiter(a):
 
 
 class IdentityOp(scalar.UnaryScalarOp):
-
     @staticmethod
     def st_impl(x):
         return x
@@ -203,7 +205,10 @@ def make_shared_replacements(vars, model):
     Dict of variable -> new shared variable
     """
     othervars = set(model.vars) - set(vars)
-    return {var: theano.shared(var.tag.test_value, var.name + '_shared') for var in othervars}
+    return {
+        var: theano.shared(var.tag.test_value, var.name + "_shared")
+        for var in othervars
+    }
 
 
 def join_nonshared_inputs(xs, vars, shared, make_shared=False):
@@ -222,15 +227,15 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False):
     inarray : vector of inputs
     """
     if not vars:
-        raise ValueError('Empty list of variables.')
+        raise ValueError("Empty list of variables.")
 
     joined = tt.concatenate([var.ravel() for var in vars])
 
     if not make_shared:
         tensor_type = joined.type
-        inarray = tensor_type('inarray')
+        inarray = tensor_type("inarray")
     else:
-        inarray = theano.shared(joined.tag.test_value, 'inarray')
+        inarray = theano.shared(joined.tag.test_value, "inarray")
 
     ordering = ArrayOrdering(vars)
     inarray.tag.test_value = joined.tag.test_value
@@ -238,7 +243,8 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False):
     get_var = {var.name: var for var in vars}
     replace = {
         get_var[var]: reshape_t(inarray[slc], shp).astype(dtyp)
-        for var, slc, shp, dtyp in ordering.vmap}
+        for var, slc, shp, dtyp in ordering.vmap
+    }
 
     replace.update(shared)
 
@@ -273,8 +279,8 @@ def __call__(self, input):
         return theano.clone(self.tensor, {oldinput: input}, strict=False)
 
 
-scalar_identity = IdentityOp(scalar.upgrade_to_float, name='scalar_identity')
-identity = tt.Elemwise(scalar_identity, name='identity')
+scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity")
+identity = tt.Elemwise(scalar_identity, name="identity")
 
 
 class GeneratorOp(Op):
@@ -295,7 +301,8 @@ class GeneratorOp(Op):
         and yields np.arrays with same types
     default : np.array with the same type as generator produces
     """
-    __props__ = ('generator',)
+
+    __props__ = ("generator",)
 
     def __init__(self, gen, default=None):
         super(GeneratorOp, self).__init__()
@@ -317,13 +324,13 @@ def perform(self, node, inputs, output_storage, params=None):
     def do_constant_folding(self, node):
         return False
 
-    __call__ = change_flags(compute_test_value='off')(Op.__call__)
+    __call__ = change_flags(compute_test_value="off")(Op.__call__)
 
     def set_gen(self, gen):
         if not isinstance(gen, GeneratorAdapter):
             gen = GeneratorAdapter(gen)
         if not gen.tensortype == self.generator.tensortype:
-            raise ValueError('New generator should yield the same type')
+            raise ValueError("New generator should yield the same type")
         self.generator = gen
 
     def set_default(self, value):
@@ -334,8 +341,9 @@ def set_default(self, value):
             t1 = (False,) * value.ndim
             t2 = self.generator.tensortype.broadcastable
             if not t1 == t2:
-                raise ValueError('Default value should have the '
-                                 'same type as generator')
+                raise ValueError(
+                    "Default value should have the " "same type as generator"
+                )
             self.default = value
 
 
@@ -450,13 +458,14 @@ def ix_(*args):
         new = tt.as_tensor(new)
         if new.ndim != 1:
             raise ValueError("Cross index must be 1 dimensional")
-        new = new.reshape((1,)*k + (new.size,) + (1,)*(nd-k-1))
+        new = new.reshape((1,) * k + (new.size,) + (1,) * (nd - k - 1))
         out.append(new)
     return tuple(out)
 
 
 def largest_common_dtype(tensors):
-    dtypes = set(str(t.dtype) if hasattr(t, 'dtype')
-                 else smartfloatX(np.asarray(t)).dtype
-                 for t in tensors)
+    dtypes = set(
+        str(t.dtype) if hasattr(t, "dtype") else smartfloatX(np.asarray(t)).dtype
+        for t in tensors
+    )
     return np.stack([np.ones((), dtype=dtype) for dtype in dtypes]).dtype
diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py
index ffde751417..bf9d2dede2 100644
--- a/pymc3/tuning/scaling.py
+++ b/pymc3/tuning/scaling.py
@@ -1,16 +1,16 @@
-'''
+"""
 Created on Mar 12, 2011
 
 from __future__ import division
 @author: johnsalvatier
-'''
+"""
 import numpy as np
 from numpy import exp, log, sqrt
 from ..model import modelcontext, Point
 from ..theanof import hessian_diag, inputvars
 from ..blocking import DictToArrayBijection, ArrayOrdering
 
-__all__ = ['approx_hessian', 'find_hessian', 'trace_cov', 'guess_scaling']
+__all__ = ["approx_hessian", "find_hessian", "trace_cov", "guess_scaling"]
 
 
 def approx_hessian(point, vars=None, model=None):
@@ -39,11 +39,11 @@ def approx_hessian(point, vars=None, model=None):
     def grad_logp(point):
         return np.nan_to_num(dlogp(point))
 
-    '''
+    """
     Find the jacobian of the gradient function at the current position
     this should be the Hessian; invert it to find the approximate
     covariance matrix.
-    '''
+    """
     return -Jacobian(grad_logp)(bij.map(point))
 
 
@@ -124,8 +124,8 @@ def adjust_scaling(s, scaling_bound):
 def adjust_precision(tau, scaling_bound=1e-8):
     mag = sqrt(abs(tau))
 
-    bounded = bound(log(mag), log(scaling_bound), log(1./scaling_bound))
-    return exp(bounded)**2
+    bounded = bound(log(mag), log(scaling_bound), log(1.0 / scaling_bound))
+    return exp(bounded) ** 2
 
 
 def bound(a, l, u):
diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 491c38b850..754bd45892 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -1,8 +1,8 @@
-'''
+"""
 Created on Mar 12, 2011
 
 @author: johnsalvatier
-'''
+"""
 from scipy.optimize import minimize
 import numpy as np
 from numpy import isfinite, nan_to_num
@@ -18,12 +18,21 @@
 import warnings
 from inspect import getargspec
 
-__all__ = ['find_MAP']
-
-
-def find_MAP(start=None, vars=None, method="L-BFGS-B",
-             return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None,
-             *args, **kwargs):
+__all__ = ["find_MAP"]
+
+
+def find_MAP(
+    start=None,
+    vars=None,
+    method="L-BFGS-B",
+    return_raw=False,
+    include_transformed=True,
+    progressbar=True,
+    maxeval=5000,
+    model=None,
+    *args,
+    **kwargs
+):
     """
     Finds the local maximum a posteriori point given a model.
 
@@ -58,7 +67,9 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
     wrapped it inside pymc3.sample() and you should thus avoid this method.
     """
 
-    warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.')
+    warnings.warn(
+        "find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way."
+    )
 
     model = modelcontext(model)
     if start is None:
@@ -67,10 +78,12 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
         update_start_vals(start, model.test_point, model)
 
     if not set(start.keys()).issubset(model.named_vars.keys()):
-        extra_keys = ', '.join(set(start.keys()) - set(model.named_vars.keys()))
-        valid_keys = ', '.join(model.named_vars.keys())
-        raise KeyError('Some start parameters do not appear in the model!\n'
-                       'Valid keys are: {}, but {} was supplied'.format(valid_keys, extra_keys))
+        extra_keys = ", ".join(set(start.keys()) - set(model.named_vars.keys()))
+        valid_keys = ", ".join(model.named_vars.keys())
+        raise KeyError(
+            "Some start parameters do not appear in the model!\n"
+            "Valid keys are: {}, but {} was supplied".format(valid_keys, extra_keys)
+        )
 
     if vars is None:
         vars = model.cont_vars
@@ -90,29 +103,37 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
         compute_gradient = False
 
     if disc_vars or not compute_gradient:
-        pm._log.warning("Warning: gradient not available." +
-                        "(E.g. vars contains discrete variables). MAP " +
-                        "estimates may not be accurate for the default " +
-                        "parameters. Defaulting to non-gradient minimization " +
-                        "'Powell'.")
+        pm._log.warning(
+            "Warning: gradient not available."
+            + "(E.g. vars contains discrete variables). MAP "
+            + "estimates may not be accurate for the default "
+            + "parameters. Defaulting to non-gradient minimization "
+            + "'Powell'."
+        )
         method = "Powell"
 
     if "fmin" in kwargs:
         fmin = kwargs.pop("fmin")
-        warnings.warn('In future versions, set the optimization algorithm with a string. '
-                      'For example, use `method="L-BFGS-B"` instead of '
-                      '`fmin=sp.optimize.fmin_l_bfgs_b"`.')
+        warnings.warn(
+            "In future versions, set the optimization algorithm with a string. "
+            'For example, use `method="L-BFGS-B"` instead of '
+            '`fmin=sp.optimize.fmin_l_bfgs_b"`.'
+        )
 
         cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)
 
         # Check to see if minimization function actually uses the gradient
-        if 'fprime' in getargspec(fmin).args:
+        if "fprime" in getargspec(fmin).args:
+
             def grad_logp(point):
                 return nan_to_num(-dlogp_func(point))
-            opt_result = fmin(cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs)
+
+            opt_result = fmin(
+                cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs
+            )
         else:
             # Check to see if minimization function uses a starting value
-            if 'x0' in getargspec(fmin).args:
+            if "x0" in getargspec(fmin).args:
                 opt_result = fmin(cost_func, bij.map(start), *args, **kwargs)
             else:
                 opt_result = fmin(cost_func, *args, **kwargs)
@@ -129,7 +150,9 @@ def grad_logp(point):
             cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)
 
         try:
-            opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs)
+            opt_result = minimize(
+                cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs
+            )
             mx0 = opt_result["x"]  # r -> opt_result
             cost_func.progress.total = cost_func.progress.n + 1
             cost_func.progress.update()
@@ -142,7 +165,9 @@ def grad_logp(point):
             cost_func.progress.close()
 
     vars = get_default_varnames(model.unobserved_RVs, include_transformed)
-    mx = {var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))}
+    mx = {
+        var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))
+    }
 
     if return_raw:
         return mx, opt_result
@@ -171,11 +196,11 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
         self.logp_func = logp_func
         if dlogp_func is None:
             self.use_gradient = False
-            self.desc = 'logp = {:,.5g}'
+            self.desc = "logp = {:,.5g}"
         else:
             self.dlogp_func = dlogp_func
             self.use_gradient = True
-            self.desc = 'logp = {:,.5g}, ||grad|| = {:,.5g}'
+            self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}"
         self.previous_x = None
         self.progress = tqdm(total=maxeval, disable=not progressbar)
         self.progress.n = 0
@@ -187,7 +212,7 @@ def __call__(self, x):
             neg_grad = self.dlogp_func(pm.floatX(x))
             if np.all(np.isfinite(neg_grad)):
                 self.previous_x = x
-            grad = nan_to_num(-1.0*neg_grad)
+            grad = nan_to_num(-1.0 * neg_grad)
             grad = grad.astype(np.float64)
         else:
             self.previous_x = x
diff --git a/pymc3/util.py b/pymc3/util.py
index 080a2121ed..934e4af82f 100644
--- a/pymc3/util.py
+++ b/pymc3/util.py
@@ -2,7 +2,7 @@
 import functools
 from numpy import asscalar
 
-LATEX_ESCAPE_RE = re.compile(r'(%|_|\$|#|&)', re.MULTILINE)
+LATEX_ESCAPE_RE = re.compile(r"(%|_|\$|#|&)", re.MULTILINE)
 
 
 def escape_latex(strng):
@@ -25,8 +25,8 @@ def escape_latex(strng):
         A string with LaTeX escaped
     """
     if strng is None:
-        return u'None'
-    return LATEX_ESCAPE_RE.sub(r'\\\1', strng)
+        return u"None"
+    return LATEX_ESCAPE_RE.sub(r"\\\1", strng)
 
 
 def get_transformed_name(name, transform):
@@ -62,7 +62,7 @@ def is_transformed_name(name):
     bool
         Boolean, whether the string could have been produced by `get_transormed_name`
     """
-    return name.endswith('__') and name.count('_') >= 3
+    return name.endswith("__") and name.count("_") >= 3
 
 
 def get_untransformed_name(name):
@@ -80,9 +80,8 @@ def get_untransformed_name(name):
         String with untransformed version of the name.
     """
     if not is_transformed_name(name):
-        raise ValueError(
-            u'{} does not appear to be a transformed name'.format(name))
-    return '_'.join(name.split('_')[:-3])
+        raise ValueError(u"{} does not appear to be a transformed name".format(name))
+    return "_".join(name.split("_")[:-3])
 
 
 def get_default_varnames(var_iterator, include_transformed):
@@ -112,19 +111,20 @@ def get_variable_name(variable):
     """
     name = variable.name
     if name is None:
-        if hasattr(variable, 'get_parents'):
+        if hasattr(variable, "get_parents"):
             try:
-                names = [get_variable_name(item)
-                         for item in variable.get_parents()[0].inputs]
+                names = [
+                    get_variable_name(item) for item in variable.get_parents()[0].inputs
+                ]
                 # do not escape_latex these, since it is not idempotent
-                return 'f(%s)' % ',~'.join([n for n in names if isinstance(n, str)])
+                return "f(%s)" % ",~".join([n for n in names if isinstance(n, str)])
             except IndexError:
                 pass
         value = variable.eval()
         if not value.shape:
             return asscalar(value)
-        return 'array'
-    return r'\text{%s}' % name
+        return "array"
+    return r"\text{%s}" % name
 
 
 def update_start_vals(a, b, model):
@@ -137,16 +137,16 @@ def update_start_vals(a, b, model):
             for name in a:
                 if is_transformed_name(tname) and get_untransformed_name(tname) == name:
                     transform_func = [
-                        d.transformation for d in model.deterministics if d.name == name]
+                        d.transformation for d in model.deterministics if d.name == name
+                    ]
                     if transform_func:
-                        b[tname] = transform_func[0].forward_val(
-                            a[name], point=b)
+                        b[tname] = transform_func[0].forward_val(a[name], point=b)
 
     a.update({k: v for k, v in b.items() if k not in a})
 
 
 def get_transformed(z):
-    if hasattr(z, 'transformed'):
+    if hasattr(z, "transformed"):
         z = z.transformed
     return z
 
@@ -165,4 +165,5 @@ def enhanced(*args, **kwargs):
         else:
             newwrapper = functools.partial(wrapper, *args, **kwargs)
             return newwrapper
+
     return enhanced
diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py
index 72c04cccc5..9ab68fe9eb 100644
--- a/pymc3/variational/__init__.py
+++ b/pymc3/variational/__init__.py
@@ -13,7 +13,7 @@
     adam,
     adamax,
     norm_constraint,
-    total_norm_constraint
+    total_norm_constraint,
 )
 
 from . import inference
@@ -26,7 +26,7 @@
     Inference,
     KLqp,
     ImplicitGradient,
-    fit
+    fit,
 )
 
 from . import approximations
@@ -35,13 +35,10 @@
     FullRank,
     Empirical,
     NormalizingFlow,
-    sample_approx
+    sample_approx,
 )
 from . import opvi
-from .opvi import (
-    Group,
-    Approximation
-)
+from .opvi import Group, Approximation
 
 # special
 from .stein import Stein
diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py
index 30779211ed..1c4cccb058 100644
--- a/pymc3/variational/approximations.py
+++ b/pymc3/variational/approximations.py
@@ -12,37 +12,31 @@
 from pymc3.variational import flows
 
 
-__all__ = [
-    'MeanField',
-    'FullRank',
-    'Empirical',
-    'NormalizingFlow',
-    'sample_approx'
-]
+__all__ = ["MeanField", "FullRank", "Empirical", "NormalizingFlow", "sample_approx"]
 
 
 @Group.register
 class MeanFieldGroup(Group):
-    R"""Mean Field approximation to the posterior where spherical Gaussian family
+    r"""Mean Field approximation to the posterior where spherical Gaussian family
     is fitted to minimize KL divergence from True posterior. It is assumed
     that latent space variables are uncorrelated that is the main drawback
     of the method
     """
-    __param_spec__ = dict(mu=('d', ), rho=('d', ))
-    short_name = 'mean_field'
-    alias_names = frozenset(['mf'])
+    __param_spec__ = dict(mu=("d",), rho=("d",))
+    short_name = "mean_field"
+    alias_names = frozenset(["mf"])
 
     @node_property
     def mean(self):
-        return self.params_dict['mu']
+        return self.params_dict["mu"]
 
     @node_property
     def rho(self):
-        return self.params_dict['rho']
+        return self.params_dict["rho"]
 
     @node_property
     def cov(self):
-        var = rho2sd(self.rho)**2
+        var = rho2sd(self.rho) ** 2
         if self.batched:
             return batched_diag(var)
         else:
@@ -52,12 +46,12 @@ def cov(self):
     def std(self):
         return rho2sd(self.rho)
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super(MeanFieldGroup, self).__init_group__(group)
         if not self._check_user_params():
             self.shared_params = self.create_shared_params(
-                self._kwargs.get('start', None)
+                self._kwargs.get("start", None)
             )
         self._finalize_init()
 
@@ -76,10 +70,10 @@ def create_shared_params(self, start=None):
         if self.batched:
             start = np.tile(start, (self.bdim, 1))
             rho = np.tile(rho, (self.bdim, 1))
-        return {'mu': theano.shared(
-                    pm.floatX(start), 'mu'),
-                'rho': theano.shared(
-                    pm.floatX(rho), 'rho')}
+        return {
+            "mu": theano.shared(pm.floatX(start), "mu"),
+            "rho": theano.shared(pm.floatX(rho), "rho"),
+        }
 
     @node_property
     def symbolic_random(self):
@@ -104,16 +98,17 @@ class FullRankGroup(Group):
     MeanField approach correlations between variables are taken in account. The
     main drawback of the method is computational cost.
     """
-    __param_spec__ = dict(mu=('d',), L_tril=('int(d * (d + 1) / 2)',))
-    short_name = 'full_rank'
-    alias_names = frozenset(['fr'])
 
-    @change_flags(compute_test_value='off')
+    __param_spec__ = dict(mu=("d",), L_tril=("int(d * (d + 1) / 2)",))
+    short_name = "full_rank"
+    alias_names = frozenset(["fr"])
+
+    @change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super(FullRankGroup, self).__init_group__(group)
         if not self._check_user_params():
             self.shared_params = self.create_shared_params(
-                self._kwargs.get('start', None)
+                self._kwargs.get("start", None)
             )
         self._finalize_init()
 
@@ -129,35 +124,29 @@ def create_shared_params(self, start=None):
         else:
             start = self.bij.map(start)
         n = self.ddim
-        L_tril = (
-            np.eye(n)
-            [np.tril_indices(n)]
-            .astype(theano.config.floatX)
-        )
+        L_tril = np.eye(n)[np.tril_indices(n)].astype(theano.config.floatX)
         if self.batched:
             start = np.tile(start, (self.bdim, 1))
             L_tril = np.tile(L_tril, (self.bdim, 1))
-        return {'mu': theano.shared(start, 'mu'),
-                'L_tril': theano.shared(L_tril, 'L_tril')}
+        return {
+            "mu": theano.shared(start, "mu"),
+            "L_tril": theano.shared(L_tril, "L_tril"),
+        }
 
     @node_property
     def L(self):
         if self.batched:
             L = tt.zeros((self.ddim, self.ddim, self.bdim))
-            L = tt.set_subtensor(
-                L[self.tril_indices],
-                self.params_dict['L_tril'].T)
+            L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T)
             L = L.dimshuffle(2, 0, 1)
         else:
             L = tt.zeros((self.ddim, self.ddim))
-            L = tt.set_subtensor(
-                L[self.tril_indices],
-                self.params_dict['L_tril'])
+            L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"])
         return L
 
     @node_property
     def mean(self):
-        return self.params_dict['mu']
+        return self.params_dict["mu"]
 
     @node_property
     def cov(self):
@@ -187,8 +176,10 @@ def tril_indices(self):
     def symbolic_logq_not_scaled(self):
         z = self.symbolic_random
         if self.batched:
+
             def logq(z_b, mu_b, L_b):
                 return pm.MvNormal.dist(mu=mu_b, chol=L_b).logp(z_b)
+
             # it's gonna be so slow
             # scan is computed over batch and then summed up
             # output shape is (batch, samples)
@@ -215,28 +206,29 @@ class EmpiricalGroup(Group):
     """Builds Approximation instance from a given trace,
     it has the same interface as variational approximation
     """
+
     supports_batched = False
     has_logq = False
-    __param_spec__ = dict(histogram=('s', 'd'))
-    short_name = 'empirical'
+    __param_spec__ = dict(histogram=("s", "d"))
+    short_name = "empirical"
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super(EmpiricalGroup, self).__init_group__(group)
         self._check_trace()
         if not self._check_user_params(spec_kw=dict(s=-1)):
             self.shared_params = self.create_shared_params(
-                trace=self._kwargs.get('trace', None),
-                size=self._kwargs.get('size', None),
-                jitter=self._kwargs.get('jitter', 1),
-                start=self._kwargs.get('start', None)
+                trace=self._kwargs.get("trace", None),
+                size=self._kwargs.get("size", None),
+                jitter=self._kwargs.get("jitter", 1),
+                start=self._kwargs.get("start", None),
             )
         self._finalize_init()
 
     def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
         if trace is None:
             if size is None:
-                raise opvi.ParametrizationError('Need `trace` or `size` to initialize')
+                raise opvi.ParametrizationError("Need `trace` or `size` to initialize")
             else:
                 if start is None:
                     start = self.model.test_point
@@ -256,14 +248,14 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
                 for j in range(len(trace)):
                     histogram[i] = self.bij.map(trace.point(j, t))
                     i += 1
-        return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram'))
+        return dict(histogram=theano.shared(pm.floatX(histogram), "histogram"))
 
     def _check_trace(self):
-        trace = self._kwargs.get('trace', None)
-        if (trace is not None
-            and not all([var.name in trace.varnames
-                         for var in self.group])):
-            raise ValueError('trace has not all FreeRV in the group')
+        trace = self._kwargs.get("trace", None)
+        if trace is not None and not all(
+            [var.name in trace.varnames for var in self.group]
+        ):
+            raise ValueError("trace has not all FreeRV in the group")
 
     def randidx(self, size=None):
         if size is None:
@@ -272,16 +264,16 @@ def randidx(self, size=None):
             if size.ndim < 1:
                 size = size[None]
             elif size.ndim > 1:
-                raise ValueError('size ndim should be no more than 1d')
+                raise ValueError("size ndim should be no more than 1d")
             else:
                 pass
         else:
             size = tuple(np.atleast_1d(size))
-        return (self._rng
-                .uniform(size=size,
-                         low=pm.floatX(0),
-                         high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16))
-                .astype('int32'))
+        return self._rng.uniform(
+            size=size,
+            low=pm.floatX(0),
+            high=pm.floatX(self.histogram.shape[0]) - pm.floatX(1e-16),
+        ).astype("int32")
 
     def _new_initial(self, size, deterministic, more_replacements=None):
         theano_condition_is_here = isinstance(deterministic, tt.Variable)
@@ -289,14 +281,15 @@ def _new_initial(self, size, deterministic, more_replacements=None):
             return tt.switch(
                 deterministic,
                 tt.repeat(
-                    self.mean.dimshuffle('x', 0),
-                    size if size is not None else 1, -1),
-                self.histogram[self.randidx(size)])
+                    self.mean.dimshuffle("x", 0), size if size is not None else 1, -1
+                ),
+                self.histogram[self.randidx(size)],
+            )
         else:
             if deterministic:
                 return tt.repeat(
-                    self.mean.dimshuffle('x', 0),
-                    size if size is not None else 1, -1)
+                    self.mean.dimshuffle("x", 0), size if size is not None else 1, -1
+                )
             else:
                 return self.histogram[self.randidx(size)]
 
@@ -306,7 +299,7 @@ def symbolic_random(self):
 
     @property
     def histogram(self):
-        return self.params_dict['histogram']
+        return self.params_dict["histogram"]
 
     @node_property
     def mean(self):
@@ -314,7 +307,7 @@ def mean(self):
 
     @node_property
     def cov(self):
-        x = (self.histogram - self.mean)
+        x = self.histogram - self.mean
         return x.T.dot(x) / pm.floatX(self.histogram.shape[0])
 
     @node_property
@@ -323,14 +316,14 @@ def std(self):
 
     def __str__(self):
         if isinstance(self.histogram, theano.compile.SharedVariable):
-            shp = ', '.join(map(str, self.histogram.shape.eval()))
+            shp = ", ".join(map(str, self.histogram.shape.eval()))
         else:
-            shp = 'None, ' + str(self.ddim)
-        return '{cls}[{shp}]'.format(shp=shp, cls=self.__class__.__name__)
+            shp = "None, " + str(self.ddim)
+        return "{cls}[{shp}]".format(shp=shp, cls=self.__class__.__name__)
 
 
 class NormalizingFlowGroup(Group):
-    R"""Normalizing flow is a series of invertible transformations on initial distribution.
+    r"""Normalizing flow is a series of invertible transformations on initial distribution.
 
     .. math::
 
@@ -377,17 +370,17 @@ class NormalizingFlowGroup(Group):
         Improving Variational Auto-Encoders using Householder Flow
         arXiv:1611.09630
     """
-    default_flow = 'scale-loc'
+    default_flow = "scale-loc"
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super(NormalizingFlowGroup, self).__init_group__(group)
         # objects to be resolved
         # 1. string formula
         # 2. not changed default value
         # 3. Formula
-        formula = self._kwargs.get('flow', self._vfam)
-        jitter = self._kwargs.get('jitter', 1)
+        formula = self._kwargs.get("flow", self._vfam)
+        jitter = self._kwargs.get("jitter", 1)
         if formula is None or isinstance(formula, str):
             # case 1 and 2
             has_params = self._check_user_params(f=formula)
@@ -395,13 +388,15 @@ def __init_group__(self, group):
             # case 3
             has_params = self._check_user_params(f=formula.formula)
         else:
-            raise TypeError('Wrong type provided for NormalizingFlow as `flow` argument, '
-                            'expected Formula or string')
+            raise TypeError(
+                "Wrong type provided for NormalizingFlow as `flow` argument, "
+                "expected Formula or string"
+            )
         if not has_params:
             if formula is None:
                 formula = self.default_flow
         else:
-            formula = '-'.join(
+            formula = "-".join(
                 flows.flow_for_params(self.user_params[i]).short_name
                 for i in range(len(self.user_params))
             )
@@ -424,31 +419,38 @@ def __init_group__(self, group):
 
     def _check_user_params(self, **kwargs):
         params = self._user_params = self.user_params
-        formula = kwargs.pop('f')
+        formula = kwargs.pop("f")
         if params is None:
             return False
         if formula is not None:
-            raise opvi.ParametrizationError('No formula is allowed if user params are provided')
+            raise opvi.ParametrizationError(
+                "No formula is allowed if user params are provided"
+            )
         if not isinstance(params, dict):
-            raise TypeError('params should be a dict')
+            raise TypeError("params should be a dict")
         if not all(isinstance(k, int) for k in params.keys()):
-            raise TypeError('params should be a dict with `int` keys')
+            raise TypeError("params should be a dict with `int` keys")
         needed = set(range(len(params)))
         givens = set(params.keys())
         if givens != needed:
             raise opvi.ParametrizationError(
-                'Passed parameters do not have a needed set of keys, '
-                'they should be equal, needed {needed}, got {givens}'.format(
-                    givens=list(sorted(givens)), needed='[0, 1, ..., %d]' % len(formula.flows)))
+                "Passed parameters do not have a needed set of keys, "
+                "they should be equal, needed {needed}, got {givens}".format(
+                    givens=list(sorted(givens)),
+                    needed="[0, 1, ..., %d]" % len(formula.flows),
+                )
+            )
         for i in needed:
             flow = flows.flow_for_params(params[i])
             flow_keys = set(flow.__param_spec__)
             user_keys = set(params[i].keys())
             if flow_keys != user_keys:
                 raise opvi.ParametrizationError(
-                    'Passed parameters for flow `{i}` ({cls}) do not have a needed set of keys, '
-                    'they should be equal, needed {needed}, got {givens}'.format(
-                        givens=user_keys, needed=flow_keys, i=i, cls=flow.__name__))
+                    "Passed parameters for flow `{i}` ({cls}) do not have a needed set of keys, "
+                    "they should be equal, needed {needed}, got {givens}".format(
+                        givens=user_keys, needed=flow_keys, i=i, cls=flow.__name__
+                    )
+                )
         return True
 
     @property
@@ -468,7 +470,7 @@ def shared_params(self):
     @shared_params.setter
     def shared_params(self, value):
         if self.user_params is not None:
-            raise AttributeError('Cannot set when having user params')
+            raise AttributeError("Cannot set when having user params")
         current = self.flow
         i = 0
         current.shared_params = value[i]
@@ -485,7 +487,7 @@ def params(self):
     def symbolic_logq_not_scaled(self):
         z0 = self.symbolic_initial
         q0 = pm.Normal.dist().logp(z0).sum(range(1, z0.ndim))
-        return q0-self.flow.sum_logdets
+        return q0 - self.flow.sum_logdets
 
     @property
     def symbolic_random(self):
@@ -526,15 +528,22 @@ def sample_approx(approx, draws=100, include_transformed=True):
 # single group shortcuts exported to user
 class SingleGroupApproximation(Approximation):
     """Base class for Single Group Approximation"""
+
     _group_class = None
 
     def __init__(self, *args, **kwargs):
-        local_rv = kwargs.get('local_rv')
+        local_rv = kwargs.get("local_rv")
         groups = [self._group_class(None, *args, **kwargs)]
         if local_rv is not None:
-            groups.extend([Group([v], params=p, local=True, model=kwargs.get('model'))
-                           for v, p in local_rv.items()])
-        super(SingleGroupApproximation, self).__init__(groups, model=kwargs.get('model'))
+            groups.extend(
+                [
+                    Group([v], params=p, local=True, model=kwargs.get("model"))
+                    for v, p in local_rv.items()
+                ]
+            )
+        super(SingleGroupApproximation, self).__init__(
+            groups, model=kwargs.get("model")
+        )
 
     def __getattr__(self, item):
         return getattr(self.groups[0], item)
@@ -548,35 +557,45 @@ def __dir__(self):
 class MeanField(SingleGroupApproximation):
     __doc__ = """**Single Group Mean Field Approximation**
 
-    """ + str(MeanFieldGroup.__doc__)
+    """ + str(
+        MeanFieldGroup.__doc__
+    )
     _group_class = MeanFieldGroup
 
 
 class FullRank(SingleGroupApproximation):
     __doc__ = """**Single Group Full Rank Approximation**
 
-    """ + str(FullRankGroup.__doc__)
+    """ + str(
+        FullRankGroup.__doc__
+    )
     _group_class = FullRankGroup
 
 
 class Empirical(SingleGroupApproximation):
     __doc__ = """**Single Group Full Rank Approximation**
 
-    """ + str(EmpiricalGroup.__doc__)
+    """ + str(
+        EmpiricalGroup.__doc__
+    )
     _group_class = EmpiricalGroup
 
     def __init__(self, trace=None, size=None, **kwargs):
-        if kwargs.get('local_rv', None) is not None:
-            raise opvi.LocalGroupError('Empirical approximation does not support local variables')
+        if kwargs.get("local_rv", None) is not None:
+            raise opvi.LocalGroupError(
+                "Empirical approximation does not support local variables"
+            )
         super(Empirical, self).__init__(trace=trace, size=size, **kwargs)
 
 
 class NormalizingFlow(SingleGroupApproximation):
     __doc__ = """**Single Group Normalizing Flow Approximation**
 
-    """ + str(NormalizingFlowGroup.__doc__)
+    """ + str(
+        NormalizingFlowGroup.__doc__
+    )
     _group_class = NormalizingFlowGroup
 
     def __init__(self, flow=NormalizingFlowGroup.default_flow, *args, **kwargs):
-        kwargs['flow'] = flow
+        kwargs["flow"] = flow
         super(NormalizingFlow, self).__init__(*args, **kwargs)
diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py
index 748062e777..057564eeaf 100644
--- a/pymc3/variational/callbacks.py
+++ b/pymc3/variational/callbacks.py
@@ -2,11 +2,7 @@
 
 import numpy as np
 
-__all__ = [
-    'Callback',
-    'CheckParametersConvergence',
-    'Tracker'
-]
+__all__ = ["Callback", "CheckParametersConvergence", "Tracker"]
 
 
 class Callback(object):
@@ -22,10 +18,7 @@ def absolute(current, prev):
     return np.abs(current - prev)
 
 
-_diff = dict(
-    relative=relative,
-    absolute=absolute
-)
+_diff = dict(relative=relative, absolute=absolute)
 
 
 class CheckParametersConvergence(Callback):
@@ -54,8 +47,7 @@ class CheckParametersConvergence(Callback):
     ...     )
     """
 
-    def __init__(self, every=100, tolerance=1e-3,
-                 diff='relative', ord=np.inf):
+    def __init__(self, every=100, tolerance=1e-3, diff="relative", ord=np.inf):
         self._diff = _diff[diff]
         self.ord = ord
         self.every = every
@@ -74,7 +66,7 @@ def __call__(self, approx, _, i):
         self.prev = current
         norm = np.linalg.norm(delta, self.ord)
         if norm < self.tolerance:
-            raise StopIteration('Convergence achieved at %d' % i)
+            raise StopIteration("Convergence achieved at %d" % i)
 
     @staticmethod
     def flatten_shared(shared_list):
@@ -115,6 +107,7 @@ class Tracker(Callback):
     ...     tracker = Tracker(some_stat=my_callable, time=time.time)
     ...     approx = pm.fit(callbacks=[tracker])
     """
+
     def __init__(self, **kwargs):
         self.whatchdict = kwargs
         self.hist = collections.defaultdict(list)
diff --git a/pymc3/variational/flows.py b/pymc3/variational/flows.py
index 13a88a6df0..2cb5050502 100644
--- a/pymc3/variational/flows.py
+++ b/pymc3/variational/flows.py
@@ -9,12 +9,12 @@
 from . import opvi
 
 __all__ = [
-    'Formula',
-    'PlanarFlow',
-    'HouseholderFlow',
-    'RadialFlow',
-    'LocFlow',
-    'ScaleFlow'
+    "Formula",
+    "PlanarFlow",
+    "HouseholderFlow",
+    "RadialFlow",
+    "LocFlow",
+    "ScaleFlow",
 ]
 
 
@@ -39,9 +39,9 @@ class Formula(object):
     """
 
     def __init__(self, formula):
-        identifiers = formula.lower().replace(' ', '').split('-')
-        self.formula = '-'.join(identifiers)
-        identifiers = [idf.split('*') for idf in identifiers]
+        identifiers = formula.lower().replace(" ", "").split("-")
+        self.formula = "-".join(identifiers)
+        identifiers = [idf.split("*") for idf in identifiers]
         self.flows = []
 
         for tup in identifiers:
@@ -50,25 +50,31 @@ def __init__(self, formula):
             elif len(tup) == 2:
                 self.flows.extend([flow_for_short_name(tup[0])] * int(tup[1]))
             else:
-                raise ValueError('Wrong format: %s' % formula)
+                raise ValueError("Wrong format: %s" % formula)
         if len(self.flows) == 0:
-            raise ValueError('No flows in formula')
+            raise ValueError("No flows in formula")
 
-    def __call__(self, z0=None, dim=None, jitter=.001, params=None, batch_size=None):
+    def __call__(self, z0=None, dim=None, jitter=0.001, params=None, batch_size=None):
         if len(self.flows) == 0:
-            raise ValueError('No flows in formula')
+            raise ValueError("No flows in formula")
         if params is None:
             params = dict()
         flow = z0
         for i, flow_cls in enumerate(self.flows):
-            flow = flow_cls(dim=dim, jitter=jitter, z0=flow, batch_size=batch_size, **params.get(i, {}))
+            flow = flow_cls(
+                dim=dim,
+                jitter=jitter,
+                z0=flow,
+                batch_size=batch_size,
+                **params.get(i, {})
+            )
         return flow
 
     def __reduce__(self):
         return self.__class__, self.formula
 
     def __latex__(self):
-        return r'Formula{\mathcal{N}(0, 1) -> %s}' % self.formula
+        return r"Formula{\mathcal{N}(0, 1) -> %s}" % self.formula
 
     __repr__ = _latex_repr_ = __latex__
 
@@ -103,35 +109,43 @@ def seems_like_flow_params(params):
 class AbstractFlow(WithMemoization):
     shared_params = None
     __param_spec__ = dict()
-    short_name = ''
+    short_name = ""
     __param_registry = dict()
     __name_registry = dict()
 
     @classmethod
     def register(cls, sbcls):
-        assert frozenset(sbcls.__param_spec__) not in cls.__param_registry, 'Duplicate __param_spec__'
+        assert (
+            frozenset(sbcls.__param_spec__) not in cls.__param_registry
+        ), "Duplicate __param_spec__"
         cls.__param_registry[frozenset(sbcls.__param_spec__)] = sbcls
-        assert sbcls.short_name not in cls.__name_registry, 'Duplicate short_name'
+        assert sbcls.short_name not in cls.__name_registry, "Duplicate short_name"
         cls.__name_registry[sbcls.short_name] = sbcls
         return sbcls
 
     @classmethod
     def flow_for_params(cls, params):
         if frozenset(params) not in cls.__param_registry:
-            raise KeyError('No such flow for the following params: {!r}, '
-                           'only the following are supported\n\n{}'
-                           .format(params, cls.__param_registry))
+            raise KeyError(
+                "No such flow for the following params: {!r}, "
+                "only the following are supported\n\n{}".format(
+                    params, cls.__param_registry
+                )
+            )
         return cls.__param_registry[frozenset(params)]
 
     @classmethod
     def flow_for_short_name(cls, name):
         if name.lower() not in cls.__name_registry:
-            raise KeyError('No such flow: {!r}, '
-                           'only the following are supported\n\n{}'
-                           .format(name, cls.__name_registry))
+            raise KeyError(
+                "No such flow: {!r}, "
+                "only the following are supported\n\n{}".format(
+                    name, cls.__name_registry
+                )
+            )
         return cls.__name_registry[name.lower()]
 
-    def __init__(self, z0=None, dim=None, jitter=.001, batch_size=None, local=False):
+    def __init__(self, z0=None, dim=None, jitter=0.001, batch_size=None, local=False):
         self.local = local
         self.batch_size = batch_size
         self.__jitter = jitter
@@ -144,29 +158,35 @@ def __init__(self, z0=None, dim=None, jitter=.001, batch_size=None, local=False)
         if dim is not None:
             self.dim = dim
         else:
-            raise ValueError('Cannot infer dimension of flow, '
-                             'please provide dim or Flow instance as z0')
+            raise ValueError(
+                "Cannot infer dimension of flow, "
+                "please provide dim or Flow instance as z0"
+            )
         if z0 is None:
             self.z0 = tt.matrix()  # type: tt.TensorVariable
         else:
             self.z0 = tt.as_tensor(z0)
         self.parent = parent
 
-    def add_param(self, user=None, name=None, ref=0., dtype='floatX'):
-        if dtype == 'floatX':
+    def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"):
+        if dtype == "floatX":
             dtype = theano.config.floatX
         spec = self.__param_spec__[name]
-        shape = tuple(eval(s, {'d': self.dim}) for s in spec)
+        shape = tuple(eval(s, {"d": self.dim}) for s in spec)
         if user is None:
             if self.local:
-                raise opvi.LocalGroupError('Need parameters for local group flow')
+                raise opvi.LocalGroupError("Need parameters for local group flow")
             if self.batched:
                 if self.batch_size is None:
-                    raise opvi.BatchedGroupError('Need batch size to infer parameter shape')
+                    raise opvi.BatchedGroupError(
+                        "Need batch size to infer parameter shape"
+                    )
                 shape = (self.batch_size,) + shape
             return theano.shared(
-                np.asarray(np.random.normal(size=shape) * self.__jitter + ref).astype(dtype),
-                name=name
+                np.asarray(np.random.normal(size=shape) * self.__jitter + ref).astype(
+                    dtype
+                ),
+                name=name,
             )
 
         else:
@@ -191,7 +211,7 @@ def all_params(self):
         return params
 
     @property
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def sum_logdets(self):
         dets = [self.logdet]
         current = self
@@ -208,13 +228,13 @@ def forward(self):
     def logdet(self):
         raise NotImplementedError
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def forward_pass(self, z0):
         ret = theano.clone(self.forward, {self.root.z0: z0})
         try:
-            ret.tag.test_value = np.random.normal(
-                size=z0.tag.test_value.shape
-            ).astype(self.z0.dtype)
+            ret.tag.test_value = np.random.normal(size=z0.tag.test_value.shape).astype(
+                self.z0.dtype
+            )
         except AttributeError:
             ret.tag.test_value = self.root.z0.tag.test_value
         return ret
@@ -234,7 +254,7 @@ def formula(self):
         current = self
         while not current.isroot:
             current = current.parent
-            f = current.short_name + '-' + f
+            f = current.short_name + "-" + f
         return f
 
     @property
@@ -253,7 +273,7 @@ def get_param_spec_for(cls, **kwargs):
         return res
 
     def __repr__(self):
-        return 'Flow{%s}' % self.short_name
+        return "Flow{%s}" % self.short_name
 
     def __str__(self):
         return self.short_name
@@ -281,49 +301,49 @@ def __call__(self, *args):
 
 
 class LinearFlow(AbstractFlow):
-    __param_spec__ = dict(u=('d', ), w=('d', ), b=())
+    __param_spec__ = dict(u=("d",), w=("d",), b=())
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init__(self, h, u=None, w=None, b=None, **kwargs):
         self.h = h
         super(LinearFlow, self).__init__(**kwargs)
-        u = self.add_param(u, 'u')
-        w = self.add_param(w, 'w')
-        b = self.add_param(b, 'b')
+        u = self.add_param(u, "u")
+        w = self.add_param(w, "w")
+        b = self.add_param(b, "b")
         self.shared_params = dict(u=u, w=w, b=b)
         self.u_, self.w_ = self.make_uw(self.u, self.w)
 
-    u = property(lambda self: self.shared_params['u'])
-    w = property(lambda self: self.shared_params['w'])
-    b = property(lambda self: self.shared_params['b'])
+    u = property(lambda self: self.shared_params["u"])
+    w = property(lambda self: self.shared_params["w"])
+    b = property(lambda self: self.shared_params["b"])
 
     def make_uw(self, u, w):
-        raise NotImplementedError('Need to implement valid U, W transform')
+        raise NotImplementedError("Need to implement valid U, W transform")
 
     @node_property
     def forward(self):
         z = self.z0  # sxd
-        u = self.u_   # d
-        w = self.w_   # d
-        b = self.b   # .
-        h = self.h   # f
+        u = self.u_  # d
+        w = self.w_  # d
+        b = self.b  # .
+        h = self.h  # f
         # h(sxd \dot d + .)  = s
         if not self.batched:
             hwz = h(z.dot(w) + b)  # s
             # sxd + (s \outer d) = sxd
-            z1 = z + tt.outer(hwz,  u)  # sxd
+            z1 = z + tt.outer(hwz, u)  # sxd
             return z1
         else:
             z = z.swapaxes(0, 1)
             # z bxsxd
             # u bxd
             # w bxd
-            b = b.dimshuffle(0, 'x')
+            b = b.dimshuffle(0, "x")
             # b bx-
             hwz = h(tt.batched_dot(z, w) + b)  # bxs
             # bxsxd + (bxsx- * bx-xd) = bxsxd
-            hwz = hwz.dimshuffle(0, 1, 'x')  # bxsx-
-            u = u.dimshuffle(0, 'x', 1)  # bx-xd
+            hwz = hwz.dimshuffle(0, 1, "x")  # bxsx-
+            u = u.dimshuffle(0, "x", 1)  # bx-xd
             z1 = z + hwz * u  # bxsxd
             return z1.swapaxes(0, 1)  # sxbxd
 
@@ -336,21 +356,23 @@ def logdet(self):
         deriv = self.h.deriv  # f'
         if not self.batched:
             # f'(sxd \dot d + .) * -xd = sxd
-            phi = deriv(z.dot(w) + b).dimshuffle(0, 'x') * w.dimshuffle('x', 0)
+            phi = deriv(z.dot(w) + b).dimshuffle(0, "x") * w.dimshuffle("x", 0)
             # \abs(. + sxd \dot d) = s
-            det = tt.abs_(1. + phi.dot(u))
+            det = tt.abs_(1.0 + phi.dot(u))
             return tt.log(det)
         else:
             z = z.swapaxes(0, 1)
-            b = b.dimshuffle(0, 'x')
+            b = b.dimshuffle(0, "x")
             # z bxsxd
             # u bxd
             # w bxd
             # b bx-x-
             # f'(bxsxd \bdot bxd + bx-x-) * bx-xd = bxsxd
-            phi = deriv(tt.batched_dot(z, w) + b).dimshuffle(0, 1, 'x') * w.dimshuffle(0, 'x', 1)
+            phi = deriv(tt.batched_dot(z, w) + b).dimshuffle(0, 1, "x") * w.dimshuffle(
+                0, "x", 1
+            )
             # \abs(. + bxsxd \bdot bxd) = bxs
-            det = tt.abs_(1. + tt.batched_dot(phi, u))  # bxs
+            det = tt.abs_(1.0 + tt.batched_dot(phi, u))  # bxs
             return tt.log(det).sum(0)  # s
 
 
@@ -361,12 +383,12 @@ class Tanh(FlowFn):
     @staticmethod
     def deriv(*args):
         x, = args
-        return 1. - tt.tanh(x) ** 2
+        return 1.0 - tt.tanh(x) ** 2
 
 
 @AbstractFlow.register
 class PlanarFlow(LinearFlow):
-    short_name = 'planar'
+    short_name = "planar"
 
     def __init__(self, **kwargs):
         super(PlanarFlow, self).__init__(h=Tanh(), **kwargs)
@@ -376,55 +398,44 @@ def make_uw(self, u, w):
             # u_ : d
             # w_ : d
             wu = u.dot(w)  # .
-            mwu = -1. + tt.nnet.softplus(wu)  # .
+            mwu = -1.0 + tt.nnet.softplus(wu)  # .
             # d + (. - .) * d / .
-            u_h = (
-                u+(mwu-wu) *
-                w/((w**2).sum()+1e-10)
-            )
+            u_h = u + (mwu - wu) * w / ((w ** 2).sum() + 1e-10)
             return u_h, w
         else:
             # u_ : bxd
             # w_ : bxd
-            wu = (u*w).sum(-1, keepdims=True)  # bx-
-            mwu = -1. + tt.nnet.softplus(wu)  # bx-
+            wu = (u * w).sum(-1, keepdims=True)  # bx-
+            mwu = -1.0 + tt.nnet.softplus(wu)  # bx-
             # bxd + (bx- - bx-) * bxd / bx- = bxd
-            u_h = (
-                u
-                + (mwu - wu)
-                * w / ((w ** 2).sum(-1, keepdims=True) + 1e-10)
-            )
+            u_h = u + (mwu - wu) * w / ((w ** 2).sum(-1, keepdims=True) + 1e-10)
             return u_h, w
 
 
 class ReferencePointFlow(AbstractFlow):
-    __param_spec__ = dict(a=(), b=(), z_ref=('d', ))
+    __param_spec__ = dict(a=(), b=(), z_ref=("d",))
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init__(self, h, a=None, b=None, z_ref=None, **kwargs):
         super(ReferencePointFlow, self).__init__(**kwargs)
-        a = self.add_param(a, 'a')
-        b = self.add_param(b, 'b')
-        if hasattr(self.z0, 'tag') and hasattr(self.z0.tag, 'test_value'):
+        a = self.add_param(a, "a")
+        b = self.add_param(b, "b")
+        if hasattr(self.z0, "tag") and hasattr(self.z0.tag, "test_value"):
             z_ref = self.add_param(
-                z_ref, 'z_ref',
-                ref=self.z0.tag.test_value[0],
-                dtype=self.z0.dtype
+                z_ref, "z_ref", ref=self.z0.tag.test_value[0], dtype=self.z0.dtype
             )
         else:
-            z_ref = self.add_param(
-                z_ref, 'z_ref', dtype=self.z0.dtype
-            )
+            z_ref = self.add_param(z_ref, "z_ref", dtype=self.z0.dtype)
         self.h = h
         self.shared_params = dict(a=a, b=b, z_ref=z_ref)
         self.a_, self.b_ = self.make_ab(self.a, self.b)
 
-    a = property(lambda self: self.shared_params['a'])
-    b = property(lambda self: self.shared_params['b'])
-    z_ref = property(lambda self: self.shared_params['z_ref'])
+    a = property(lambda self: self.shared_params["a"])
+    b = property(lambda self: self.shared_params["b"])
+    z_ref = property(lambda self: self.shared_params["z_ref"])
 
     def make_ab(self, a, b):
-        raise NotImplementedError('Need to specify how to get a, b')
+        raise NotImplementedError("Need to specify how to get a, b")
 
     @node_property
     def forward(self):
@@ -439,13 +450,13 @@ def forward(self):
             # z bxsxd
             # z_ref bx-xd
             z = z.swapaxes(0, 1)
-            a = a.dimshuffle(0, 'x', 'x')
-            b = b.dimshuffle(0, 'x', 'x')
-            z_ref = z_ref.dimshuffle(0, 'x', 1)
+            a = a.dimshuffle(0, "x", "x")
+            b = b.dimshuffle(0, "x", "x")
+            z_ref = z_ref.dimshuffle(0, "x", 1)
         r = (z - z_ref).norm(2, axis=-1, keepdims=True)  # sx- (bxsx-)
         # global: sxd + . * h(., sx-) * (sxd - sxd) = sxd
         # local: bxsxd + b * h(b, bxsx-) * (bxsxd - bxsxd) = bxsxd
-        z1 = z + b * h(a, r) * (z-z_ref)
+        z1 = z + b * h(a, r) * (z - z_ref)
         if self.batched:
             z1 = z1.swapaxes(0, 1)
         return z1
@@ -461,9 +472,9 @@ def logdet(self):
         deriv = self.h.deriv  # h'(a, r)
         if self.batched:
             z = z.swapaxes(0, 1)
-            a = a.dimshuffle(0, 'x', 'x')
-            b = b.dimshuffle(0, 'x', 'x')
-            z_ref = z_ref.dimshuffle(0, 'x', 1)
+            a = a.dimshuffle(0, "x", "x")
+            b = b.dimshuffle(0, "x", "x")
+            z_ref = z_ref.dimshuffle(0, "x", 1)
             # a bx-x-
             # b bx-x-
             # z bxsxd
@@ -471,7 +482,7 @@ def logdet(self):
         r = (z - z_ref).norm(2, axis=-1, keepdims=True)  # s
         har = h(a, r)
         dar = deriv(a, r)
-        logdet = tt.log((1. + b*har)**(d-1.) * (1. + b*har + b*dar*r))
+        logdet = tt.log((1.0 + b * har) ** (d - 1.0) * (1.0 + b * har + b * dar * r))
         if self.batched:
             return logdet.sum([0, -1])
         else:
@@ -482,22 +493,22 @@ class Radial(FlowFn):
     @staticmethod
     def fn(*args):
         a, r = args
-        return 1./(a+r)
+        return 1.0 / (a + r)
 
     @staticmethod
     def inv(*args):
         a, y = args
-        return 1./y - a
+        return 1.0 / y - a
 
     @staticmethod
     def deriv(*args):
         a, r = args
-        return -1. / (a + r) ** 2
+        return -1.0 / (a + r) ** 2
 
 
 @AbstractFlow.register
 class RadialFlow(ReferencePointFlow):
-    short_name = 'radial'
+    short_name = "radial"
 
     def __init__(self, **kwargs):
         super(RadialFlow, self).__init__(Radial(), **kwargs)
@@ -510,15 +521,15 @@ def make_ab(self, a, b):
 
 @AbstractFlow.register
 class LocFlow(AbstractFlow):
-    __param_spec__ = dict(loc=('d', ))
-    short_name = 'loc'
+    __param_spec__ = dict(loc=("d",))
+    short_name = "loc"
 
     def __init__(self, loc=None, **kwargs):
         super(LocFlow, self).__init__(**kwargs)
-        loc = self.add_param(loc, 'loc')
+        loc = self.add_param(loc, "loc")
         self.shared_params = dict(loc=loc)
 
-    loc = property(lambda self: self.shared_params['loc'])
+    loc = property(lambda self: self.shared_params["loc"])
 
     @node_property
     def forward(self):
@@ -533,17 +544,17 @@ def logdet(self):
 
 @AbstractFlow.register
 class ScaleFlow(AbstractFlow):
-    __param_spec__ = dict(rho=('d', ))
-    short_name = 'scale'
+    __param_spec__ = dict(rho=("d",))
+    short_name = "scale"
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init__(self, rho=None, **kwargs):
         super(ScaleFlow, self).__init__(**kwargs)
-        rho = self.add_param(rho, 'rho')
+        rho = self.add_param(rho, "rho")
         self.scale = rho2sd(rho)
         self.shared_params = dict(rho=rho)
 
-    log_scale = property(lambda self: self.shared_params['log_scale'])
+    log_scale = property(lambda self: self.shared_params["log_scale"])
 
     @node_property
     def forward(self):
@@ -558,28 +569,28 @@ def logdet(self):
 
 @AbstractFlow.register
 class HouseholderFlow(AbstractFlow):
-    __param_spec__ = dict(v=('d', ))
-    short_name = 'hh'
+    __param_spec__ = dict(v=("d",))
+    short_name = "hh"
 
-    @change_flags(compute_test_value='raise')
+    @change_flags(compute_test_value="raise")
     def __init__(self, v=None, **kwargs):
         super(HouseholderFlow, self).__init__(**kwargs)
-        v = self.add_param(v, 'v')
+        v = self.add_param(v, "v")
         self.shared_params = dict(v=v)
         if self.batched:
-            vv = v.dimshuffle(0, 1, 'x') * v.dimshuffle(0, 'x', 1)
-            I = tt.eye(self.dim).dimshuffle('x', 0, 1)
-            vvn = (1e-10+(v**2).sum(-1)).dimshuffle(0, 'x', 'x')
+            vv = v.dimshuffle(0, 1, "x") * v.dimshuffle(0, "x", 1)
+            I = tt.eye(self.dim).dimshuffle("x", 0, 1)
+            vvn = (1e-10 + (v ** 2).sum(-1)).dimshuffle(0, "x", "x")
         else:
             vv = tt.outer(v, v)
             I = tt.eye(self.dim)
-            vvn = ((v**2).sum(-1)+1e-10)
-        self.H = I - 2. * vv / vvn
+            vvn = (v ** 2).sum(-1) + 1e-10
+        self.H = I - 2.0 * vv / vvn
 
     @node_property
     def forward(self):
         z = self.z0  # sxd
-        H = self.H   # dxd
+        H = self.H  # dxd
         if self.batched:
             return tt.batched_dot(z.swapaxes(0, 1), H).swapaxes(0, 1)
         else:
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 66a2198bf2..15f44fae57 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -10,7 +10,10 @@
 import pymc3 as pm
 from pymc3.variational import test_functions
 from pymc3.variational.approximations import (
-    MeanField, FullRank, Empirical, NormalizingFlow
+    MeanField,
+    FullRank,
+    Empirical,
+    NormalizingFlow,
 )
 from pymc3.variational.operators import KL, KSD
 from . import opvi
@@ -18,22 +21,22 @@
 logger = logging.getLogger(__name__)
 
 __all__ = [
-    'ADVI',
-    'FullRankADVI',
-    'SVGD',
-    'ASVGD',
-    'NFVI',
-    'Inference',
-    'ImplicitGradient',
-    'KLqp',
-    'fit'
+    "ADVI",
+    "FullRankADVI",
+    "SVGD",
+    "ASVGD",
+    "NFVI",
+    "Inference",
+    "ImplicitGradient",
+    "KLqp",
+    "fit",
 ]
 
-State = collections.namedtuple('State', 'i,step,callbacks,score')
+State = collections.namedtuple("State", "i,step,callbacks,score")
 
 
 class Inference(object):
-    R"""**Base class for Variational Inference**
+    r"""**Base class for Variational Inference**
 
     Communicates Operator, Approximation and Test Function to build Objective Function
 
@@ -59,9 +62,10 @@ def _maybe_score(self, score):
         if score is None:
             score = returns_loss
         elif score and not returns_loss:
-            warnings.warn('method `fit` got `score == True` but %s '
-                          'does not return loss. Ignoring `score` argument'
-                          % self.objective.op)
+            warnings.warn(
+                "method `fit` got `score == True` but %s "
+                "does not return loss. Ignoring `score` argument" % self.objective.op
+            )
             score = False
         else:
             pass
@@ -69,11 +73,10 @@ def _maybe_score(self, score):
 
     def run_profiling(self, n=1000, score=None, **kwargs):
         score = self._maybe_score(score)
-        fn_kwargs = kwargs.pop('fn_kwargs', dict())
-        fn_kwargs['profile'] = True
+        fn_kwargs = kwargs.pop("fn_kwargs", dict())
+        fn_kwargs["profile"] = True
         step_func = self.objective.step_function(
-            score=score, fn_kwargs=fn_kwargs,
-            **kwargs
+            score=score, fn_kwargs=fn_kwargs, **kwargs
         )
         progress = tqdm.trange(n)
         try:
@@ -85,8 +88,7 @@ def run_profiling(self, n=1000, score=None, **kwargs):
             progress.close()
         return step_func.profile
 
-    def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
-            **kwargs):
+    def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs):
         """Perform Operator Variational Inference
 
         Parameters
@@ -158,37 +160,40 @@ def _iterate_without_loss(self, s, _, step_func, progress, callbacks):
                         for i in range(slclen):
                             name_slc.append((vmap_.var, i))
                     index = np.where(np.isnan(current_param))[0]
-                    errmsg = ['NaN occurred in optimization. ']
-                    suggest_solution = 'Try tracking this parameter: ' \
-                                       'http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters'
+                    errmsg = ["NaN occurred in optimization. "]
+                    suggest_solution = (
+                        "Try tracking this parameter: "
+                        "http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters"
+                    )
                     try:
                         for ii in index:
-                            errmsg.append('The current approximation of RV `{}`.ravel()[{}]'
-                                          ' is NaN.'.format(*name_slc[ii]))
+                            errmsg.append(
+                                "The current approximation of RV `{}`.ravel()[{}]"
+                                " is NaN.".format(*name_slc[ii])
+                            )
                         errmsg.append(suggest_solution)
                     except IndexError:
                         pass
-                    raise FloatingPointError('\n'.join(errmsg))
+                    raise FloatingPointError("\n".join(errmsg))
                 for callback in callbacks:
-                    callback(self.approx, None, i+s+1)
+                    callback(self.approx, None, i + s + 1)
         except (KeyboardInterrupt, StopIteration) as e:
             progress.close()
             if isinstance(e, StopIteration):
                 logger.info(str(e))
         finally:
             progress.close()
-        return State(i+s, step=step_func,
-                     callbacks=callbacks,
-                     score=False)
+        return State(i + s, step=step_func, callbacks=callbacks, score=False)
 
     def _iterate_with_loss(self, s, n, step_func, progress, callbacks):
         def _infmean(input_array):
             """Return the mean of the finite values of the array"""
-            input_array = input_array[np.isfinite(input_array)].astype('float64')
+            input_array = input_array[np.isfinite(input_array)].astype("float64")
             if len(input_array) == 0:
                 return np.nan
             else:
                 return np.mean(input_array)
+
         scores = np.empty(n)
         scores[:] = np.nan
         i = 0
@@ -207,26 +212,29 @@ def _infmean(input_array):
                         for i in range(slclen):
                             name_slc.append((vmap_.var, i))
                     index = np.where(np.isnan(current_param))[0]
-                    errmsg = ['NaN occurred in optimization. ']
-                    suggest_solution = 'Try tracking this parameter: ' \
-                                       'http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters'
+                    errmsg = ["NaN occurred in optimization. "]
+                    suggest_solution = (
+                        "Try tracking this parameter: "
+                        "http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters"
+                    )
                     try:
                         for ii in index:
-                            errmsg.append('The current approximation of RV `{}`.ravel()[{}]'
-                                          ' is NaN.'.format(*name_slc[ii]))
+                            errmsg.append(
+                                "The current approximation of RV `{}`.ravel()[{}]"
+                                " is NaN.".format(*name_slc[ii])
+                            )
                         errmsg.append(suggest_solution)
                     except IndexError:
                         pass
-                    raise FloatingPointError('\n'.join(errmsg))
+                    raise FloatingPointError("\n".join(errmsg))
                 scores[i] = e
                 if i % 10 == 0:
-                    avg_loss = _infmean(scores[max(0, i - 1000):i + 1])
-                    progress.set_description('Average Loss = {:,.5g}'.format(avg_loss))
-                    avg_loss = scores[max(0, i - 1000):i + 1].mean()
-                    progress.set_description(
-                        'Average Loss = {:,.5g}'.format(avg_loss))
+                    avg_loss = _infmean(scores[max(0, i - 1000) : i + 1])
+                    progress.set_description("Average Loss = {:,.5g}".format(avg_loss))
+                    avg_loss = scores[max(0, i - 1000) : i + 1].mean()
+                    progress.set_description("Average Loss = {:,.5g}".format(avg_loss))
                 for callback in callbacks:
-                    callback(self.approx, scores[:i + 1], i+s+1)
+                    callback(self.approx, scores[: i + 1], i + s + 1)
         except (KeyboardInterrupt, StopIteration) as e:  # pragma: no cover
             # do not print log on the same line
             progress.close()
@@ -234,32 +242,34 @@ def _infmean(input_array):
             if isinstance(e, StopIteration):
                 logger.info(str(e))
             if n < 10:
-                logger.info('Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}'.format(
-                    i, 100 * i // n, scores[i]))
+                logger.info(
+                    "Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}".format(
+                        i, 100 * i // n, scores[i]
+                    )
+                )
             else:
-                avg_loss = _infmean(scores[min(0, i - 1000):i + 1])
-                logger.info('Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}'.format(
-                    i, 100 * i // n, avg_loss))
+                avg_loss = _infmean(scores[min(0, i - 1000) : i + 1])
+                logger.info(
+                    "Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}".format(
+                        i, 100 * i // n, avg_loss
+                    )
+                )
         else:
             if n < 10:
-                logger.info(
-                    'Finished [100%]: Loss = {:,.5g}'.format(scores[-1]))
+                logger.info("Finished [100%]: Loss = {:,.5g}".format(scores[-1]))
             else:
-                avg_loss = _infmean(scores[max(0, i - 1000):i + 1])
-                logger.info(
-                    'Finished [100%]: Average Loss = {:,.5g}'.format(avg_loss))
+                avg_loss = _infmean(scores[max(0, i - 1000) : i + 1])
+                logger.info("Finished [100%]: Average Loss = {:,.5g}".format(avg_loss))
         finally:
             progress.close()
         self.hist = np.concatenate([self.hist, scores])
-        return State(i+s, step=step_func,
-                     callbacks=callbacks,
-                     score=True)
+        return State(i + s, step=step_func, callbacks=callbacks, score=True)
 
     def refine(self, n, progressbar=True):
         """Refine the solution using the last compiled step function
         """
         if self.state is None:
-            raise TypeError('Need to call `.fit` first')
+            raise TypeError("Need to call `.fit` first")
         i, step, callbacks, score = self.state
         with tqdm.trange(n, disable=not progressbar) as progress:
             if score:
@@ -293,12 +303,13 @@ class KLqp(Inference):
         Understanding disentangling in :math:`\beta`-VAE
         arXiv preprint 1804.03599
     """
-    def __init__(self, approx, beta=1.):
+
+    def __init__(self, approx, beta=1.0):
         super(KLqp, self).__init__(KL, approx, None, beta=beta)
 
 
 class ADVI(KLqp):
-    R"""**Automatic Differentiation Variational Inference (ADVI)**
+    r"""**Automatic Differentiation Variational Inference (ADVI)**
 
     This class implements the meanfield ADVI, where the variational
     posterior distribution is assumed to be spherical Gaussian without
@@ -446,7 +457,7 @@ def __init__(self, *args, **kwargs):
 
 
 class FullRankADVI(KLqp):
-    R"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**
+    r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**
 
     Parameters
     ----------
@@ -491,17 +502,15 @@ class ImplicitGradient(Inference):
     only for large number of samples. Larger temperature is needed for small number of
     samples but there is no theoretical approach to choose the best one in such case.
     """
+
     def __init__(self, approx, estimator=KSD, kernel=test_functions.rbf, **kwargs):
         super(ImplicitGradient, self).__init__(
-            op=estimator,
-            approx=approx,
-            tf=kernel,
-            **kwargs
+            op=estimator, approx=approx, tf=kernel, **kwargs
         )
 
 
 class SVGD(ImplicitGradient):
-    R"""**Stein Variational Gradient Descent**
+    r"""**Stein Variational Gradient Descent**
 
     This inference is based on Kernelized Stein Discrepancy
     it's main idea is to move initial noisy particles so that
@@ -551,23 +560,33 @@ class SVGD(ImplicitGradient):
         arXiv:1704.02399
     """
 
-    def __init__(self, n_particles=100, jitter=1, model=None, start=None,
-                 random_seed=None, estimator=KSD, kernel=test_functions.rbf, **kwargs):
-        if kwargs.get('local_rv') is not None:
-            raise opvi.AEVBInferenceError('SVGD does not support local groups')
+    def __init__(
+        self,
+        n_particles=100,
+        jitter=1,
+        model=None,
+        start=None,
+        random_seed=None,
+        estimator=KSD,
+        kernel=test_functions.rbf,
+        **kwargs
+    ):
+        if kwargs.get("local_rv") is not None:
+            raise opvi.AEVBInferenceError("SVGD does not support local groups")
         empirical = Empirical(
-            size=n_particles, jitter=jitter,
-            start=start, model=model, random_seed=random_seed)
+            size=n_particles,
+            jitter=jitter,
+            start=start,
+            model=model,
+            random_seed=random_seed,
+        )
         super(SVGD, self).__init__(
-            approx=empirical,
-            estimator=estimator,
-            kernel=kernel,
-            **kwargs
+            approx=empirical, estimator=estimator, kernel=kernel, **kwargs
         )
 
 
 class ASVGD(ImplicitGradient):
-    R"""**Amortized Stein Variational Gradient Descent**
+    r"""**Amortized Stein Variational Gradient Descent**
 
     **not suggested to use**
 
@@ -612,38 +631,49 @@ class ASVGD(ImplicitGradient):
     """
 
     def __init__(self, approx=None, estimator=KSD, kernel=test_functions.rbf, **kwargs):
-        warnings.warn('You are using experimental inference Operator. '
-                      'It requires careful choice of temperature, default is 1. '
-                      'Default temperature works well for low dimensional problems and '
-                      'for significant `n_obj_mc`. Temperature > 1 gives more exploration '
-                      'power to algorithm, < 1 leads to undesirable results. Please take '
-                      'it in account when looking at inference result. Posterior variance '
-                      'is often **underestimated** when using temperature = 1.')
+        warnings.warn(
+            "You are using experimental inference Operator. "
+            "It requires careful choice of temperature, default is 1. "
+            "Default temperature works well for low dimensional problems and "
+            "for significant `n_obj_mc`. Temperature > 1 gives more exploration "
+            "power to algorithm, < 1 leads to undesirable results. Please take "
+            "it in account when looking at inference result. Posterior variance "
+            "is often **underestimated** when using temperature = 1."
+        )
         if approx is None:
             approx = FullRank(
-                model=kwargs.pop('model', None),
-                local_rv=kwargs.pop('local_rv', None)
+                model=kwargs.pop("model", None), local_rv=kwargs.pop("local_rv", None)
             )
         super(ASVGD, self).__init__(
-            estimator=estimator,
-            approx=approx,
-            kernel=kernel,
-            **kwargs
+            estimator=estimator, approx=approx, kernel=kernel, **kwargs
         )
 
-    def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
-            obj_n_mc=500, **kwargs):
+    def fit(
+        self,
+        n=10000,
+        score=None,
+        callbacks=None,
+        progressbar=True,
+        obj_n_mc=500,
+        **kwargs
+    ):
         return super(ASVGD, self).fit(
-            n=n, score=score, callbacks=callbacks,
-            progressbar=progressbar, obj_n_mc=obj_n_mc, **kwargs)
+            n=n,
+            score=score,
+            callbacks=callbacks,
+            progressbar=progressbar,
+            obj_n_mc=obj_n_mc,
+            **kwargs
+        )
 
     def run_profiling(self, n=1000, score=None, obj_n_mc=500, **kwargs):
         return super(ASVGD, self).run_profiling(
-            n=n, score=score, obj_n_mc=obj_n_mc, **kwargs)
+            n=n, score=score, obj_n_mc=obj_n_mc, **kwargs
+        )
 
 
 class NFVI(KLqp):
-    R"""**Normalizing Flow based :class:`KLqp` inference**
+    r"""**Normalizing Flow based :class:`KLqp` inference**
 
     Normalizing flow is a series of invertible transformations on initial distribution.
 
@@ -697,9 +727,17 @@ def __init__(self, *args, **kwargs):
         super(NFVI, self).__init__(NormalizingFlow(*args, **kwargs))
 
 
-def fit(n=10000, local_rv=None, method='advi', model=None,
-        random_seed=None, start=None, inf_kwargs=None, **kwargs):
-    R"""Handy shortcut for using inference methods in functional way
+def fit(
+    n=10000,
+    local_rv=None,
+    method="advi",
+    model=None,
+    random_seed=None,
+    start=None,
+    inf_kwargs=None,
+    **kwargs
+):
+    r"""Handy shortcut for using inference methods in functional way
 
     Parameters
     ----------
@@ -767,42 +805,33 @@ def fit(n=10000, local_rv=None, method='advi', model=None,
     else:
         inf_kwargs = inf_kwargs.copy()
     if local_rv is not None:
-        inf_kwargs['local_rv'] = local_rv
+        inf_kwargs["local_rv"] = local_rv
     if random_seed is not None:
-        inf_kwargs['random_seed'] = random_seed
+        inf_kwargs["random_seed"] = random_seed
     if start is not None:
-        inf_kwargs['start'] = start
+        inf_kwargs["start"] = start
     if model is None:
         model = pm.modelcontext(model)
     _select = dict(
-        advi=ADVI,
-        fullrank_advi=FullRankADVI,
-        svgd=SVGD,
-        asvgd=ASVGD,
-        nfvi=NFVI
+        advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI
     )
     if isinstance(method, str):
         method = method.lower()
-        if method.startswith('nfvi='):
+        if method.startswith("nfvi="):
             formula = method[5:]
-            inference = NFVI(
-                formula,
-                **inf_kwargs
-                )
+            inference = NFVI(formula, **inf_kwargs)
         elif method in _select:
 
-            inference = _select[method](
-                model=model,
-                **inf_kwargs
-            )
+            inference = _select[method](model=model, **inf_kwargs)
         else:
-            raise KeyError('method should be one of %s '
-                           'or Inference instance' %
-                           set(_select.keys()))
+            raise KeyError(
+                "method should be one of %s "
+                "or Inference instance" % set(_select.keys())
+            )
     elif isinstance(method, Inference):
         inference = method
     else:
-        raise TypeError('method should be one of %s '
-                        'or Inference instance' %
-                        set(_select.keys()))
+        raise TypeError(
+            "method should be one of %s " "or Inference instance" % set(_select.keys())
+        )
     return inference.fit(n, **kwargs)
diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py
index dc1a9f1226..64cc023366 100644
--- a/pymc3/variational/operators.py
+++ b/pymc3/variational/operators.py
@@ -5,14 +5,11 @@
 from pymc3.variational.stein import Stein
 import pymc3 as pm
 
-__all__ = [
-    'KL',
-    'KSD'
-]
+__all__ = ["KL", "KSD"]
 
 
 class KL(Operator):
-    R"""**Operator based on Kullback Leibler Divergence**
+    r"""**Operator based on Kullback Leibler Divergence**
 
     This operator constructs Evidence Lower Bound (ELBO) objective
 
@@ -35,18 +32,19 @@ class KL(Operator):
         Beta parameter for KL divergence, scales the regularization term.
     """
 
-    def __init__(self, approx, beta=1.):
+    def __init__(self, approx, beta=1.0):
         Operator.__init__(self, approx)
         self.beta = pm.floatX(beta)
 
     def apply(self, f):
         return -self.datalogp_norm + self.beta * (self.logq_norm - self.varlogp_norm)
 
+
 # SVGD Implementation
 
 
 class KSDObjective(ObjectiveFunction):
-    R"""Helper class for construction loss and updates for variational inference
+    r"""Helper class for construction loss and updates for variational inference
 
     Parameters
     ----------
@@ -58,10 +56,10 @@ class KSDObjective(ObjectiveFunction):
 
     def __init__(self, op, tf):
         if not isinstance(op, KSD):
-            raise opvi.ParametrizationError('Op should be KSD')
+            raise opvi.ParametrizationError("Op should be KSD")
         ObjectiveFunction.__init__(self, op, tf)
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __call__(self, nmc, **kwargs):
         op = self.op  # type: KSD
         grad = op.apply(self.tf)
@@ -69,17 +67,19 @@ def __call__(self, nmc, **kwargs):
             z = self.approx.joint_histogram
         else:
             z = self.approx.symbolic_random
-        if 'more_obj_params' in kwargs:
-            params = self.obj_params + kwargs['more_obj_params']
+        if "more_obj_params" in kwargs:
+            params = self.obj_params + kwargs["more_obj_params"]
         else:
-            params = self.test_params + kwargs['more_tf_params']
+            params = self.test_params + kwargs["more_tf_params"]
             grad *= pm.floatX(-1)
         grads = tt.grad(None, params, known_grads={z: grad})
-        return self.approx.set_size_and_deterministic(grads, nmc, 0, kwargs.get('more_replacements'))
+        return self.approx.set_size_and_deterministic(
+            grads, nmc, 0, kwargs.get("more_replacements")
+        )
 
 
 class KSD(Operator):
-    R"""**Operator based on Kernelized Stein Discrepancy**
+    r"""**Operator based on Kernelized Stein Discrepancy**
 
     Input: A target distribution with density function :math:`p(x)`
         and a set of initial particles :math:`\{x^0_i\}^n_{i=1}`
@@ -120,5 +120,6 @@ def apply(self, f):
             approx=self.approx,
             kernel=f,
             use_histogram=self.approx.all_histograms,
-            temperature=self.temperature)
+            temperature=self.temperature,
+        )
         return pm.floatX(-1) * stein.grad
diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py
index 7b97c25e4d..da3738aad7 100644
--- a/pymc3/variational/opvi.py
+++ b/pymc3/variational/opvi.py
@@ -1,4 +1,4 @@
-R"""
+r"""
 Variational inference is a great approach for doing really complex,
 often intractable Bayesian inference in approximate form. Common methods
 (e.g. ADVI) lack from complexity so that approximate posterior does not
@@ -42,21 +42,13 @@
 import pymc3 as pm
 from pymc3.util import get_transformed
 from .updates import adagrad_window
-from ..blocking import (
-    ArrayOrdering, DictToArrayBijection, VarMap
-)
+from ..blocking import ArrayOrdering, DictToArrayBijection, VarMap
 from ..model import modelcontext
 from ..theanof import tt_rng, change_flags, identity
 from ..util import get_default_varnames
 from ..memoize import WithMemoization, memoize
 
-__all__ = [
-    'ObjectiveFunction',
-    'Operator',
-    'TestFunction',
-    'Group',
-    'Approximation'
-]
+__all__ = ["ObjectiveFunction", "Operator", "TestFunction", "Group", "Approximation"]
 
 
 class VariationalInferenceError(Exception):
@@ -96,7 +88,9 @@ def inner(*args, **kwargs):
             res = f(*args, **kwargs)
             res.name = name
             return res
+
         return inner
+
     return wrap
 
 
@@ -106,13 +100,19 @@ def node_property(f):
     if isinstance(f, str):
 
         def wrapper(fn):
-            return property(memoize(change_flags(compute_test_value='off')(append_name(f)(fn)), bound=True))
+            return property(
+                memoize(
+                    change_flags(compute_test_value="off")(append_name(f)(fn)),
+                    bound=True,
+                )
+            )
+
         return wrapper
     else:
-        return property(memoize(change_flags(compute_test_value='off')(f), bound=True))
+        return property(memoize(change_flags(compute_test_value="off")(f), bound=True))
 
 
-@change_flags(compute_test_value='ignore')
+@change_flags(compute_test_value="ignore")
 def try_to_set_test_value(node_in, node_out, s):
     _s = s
     if s is None:
@@ -123,8 +123,8 @@ def try_to_set_test_value(node_in, node_out, s):
     if not isinstance(node_out, (list, tuple)):
         node_out = [node_out]
     for i, o in zip(node_in, node_out):
-        if hasattr(i.tag, 'test_value'):
-            if not hasattr(s.tag, 'test_value'):
+        if hasattr(i.tag, "test_value"):
+            if not hasattr(s.tag, "test_value"):
                 continue
             else:
                 tv = i.tag.test_value[None, ...]
@@ -137,11 +137,12 @@ def try_to_set_test_value(node_in, node_out, s):
 class ObjectiveUpdates(theano.OrderedUpdates):
     """OrderedUpdates extension for storing loss
     """
+
     loss = None
 
 
 def _warn_not_used(smth, where):
-    warnings.warn('`%s` is not used for %s and ignored' % (smth, where))
+    warnings.warn("`%s` is not used for %s and ignored" % (smth, where))
 
 
 class ObjectiveFunction(object):
@@ -163,9 +164,18 @@ def __init__(self, op, tf):
     test_params = property(lambda self: self.tf.params)
     approx = property(lambda self: self.op.approx)
 
-    def updates(self, obj_n_mc=None, tf_n_mc=None, obj_optimizer=adagrad_window, test_optimizer=adagrad_window,
-                more_obj_params=None, more_tf_params=None, more_updates=None,
-                more_replacements=None, total_grad_norm_constraint=None):
+    def updates(
+        self,
+        obj_n_mc=None,
+        tf_n_mc=None,
+        obj_optimizer=adagrad_window,
+        test_optimizer=adagrad_window,
+        more_obj_params=None,
+        more_tf_params=None,
+        more_updates=None,
+        more_replacements=None,
+        total_grad_norm_constraint=None,
+    ):
         """Calculate gradients for objective function, test function and then
         constructs updates for optimization step
 
@@ -204,68 +214,90 @@ def updates(self, obj_n_mc=None, tf_n_mc=None, obj_optimizer=adagrad_window, tes
                 test_optimizer=test_optimizer,
                 more_tf_params=more_tf_params,
                 more_replacements=more_replacements,
-                total_grad_norm_constraint=total_grad_norm_constraint
+                total_grad_norm_constraint=total_grad_norm_constraint,
             )
         else:
             if tf_n_mc is not None:
-                _warn_not_used('tf_n_mc', self.op)
+                _warn_not_used("tf_n_mc", self.op)
             if more_tf_params:
-                _warn_not_used('more_tf_params', self.op)
+                _warn_not_used("more_tf_params", self.op)
         self.add_obj_updates(
             resulting_updates,
             obj_n_mc=obj_n_mc,
             obj_optimizer=obj_optimizer,
             more_obj_params=more_obj_params,
             more_replacements=more_replacements,
-            total_grad_norm_constraint=total_grad_norm_constraint
+            total_grad_norm_constraint=total_grad_norm_constraint,
         )
         resulting_updates.update(more_updates)
         return resulting_updates
 
-    def add_test_updates(self, updates, tf_n_mc=None, test_optimizer=adagrad_window,
-                         more_tf_params=None, more_replacements=None,
-                         total_grad_norm_constraint=None):
+    def add_test_updates(
+        self,
+        updates,
+        tf_n_mc=None,
+        test_optimizer=adagrad_window,
+        more_tf_params=None,
+        more_replacements=None,
+        total_grad_norm_constraint=None,
+    ):
         if more_tf_params is None:
             more_tf_params = []
         if more_replacements is None:
             more_replacements = dict()
-        tf_target = self(tf_n_mc, more_tf_params=more_tf_params, more_replacements=more_replacements)
-        grads = pm.updates.get_or_compute_grads(tf_target, self.obj_params + more_tf_params)
+        tf_target = self(
+            tf_n_mc, more_tf_params=more_tf_params, more_replacements=more_replacements
+        )
+        grads = pm.updates.get_or_compute_grads(
+            tf_target, self.obj_params + more_tf_params
+        )
         if total_grad_norm_constraint is not None:
             grads = pm.total_norm_constraint(grads, total_grad_norm_constraint)
-        updates.update(
-            test_optimizer(
-                grads,
-                self.test_params +
-                more_tf_params))
-
-    def add_obj_updates(self, updates, obj_n_mc=None, obj_optimizer=adagrad_window,
-                        more_obj_params=None, more_replacements=None,
-                        total_grad_norm_constraint=None):
+        updates.update(test_optimizer(grads, self.test_params + more_tf_params))
+
+    def add_obj_updates(
+        self,
+        updates,
+        obj_n_mc=None,
+        obj_optimizer=adagrad_window,
+        more_obj_params=None,
+        more_replacements=None,
+        total_grad_norm_constraint=None,
+    ):
         if more_obj_params is None:
             more_obj_params = []
         if more_replacements is None:
             more_replacements = dict()
-        obj_target = self(obj_n_mc, more_obj_params=more_obj_params, more_replacements=more_replacements)
-        grads = pm.updates.get_or_compute_grads(obj_target, self.obj_params + more_obj_params)
+        obj_target = self(
+            obj_n_mc,
+            more_obj_params=more_obj_params,
+            more_replacements=more_replacements,
+        )
+        grads = pm.updates.get_or_compute_grads(
+            obj_target, self.obj_params + more_obj_params
+        )
         if total_grad_norm_constraint is not None:
             grads = pm.total_norm_constraint(grads, total_grad_norm_constraint)
-        updates.update(
-            obj_optimizer(
-                grads,
-                self.obj_params +
-                more_obj_params))
+        updates.update(obj_optimizer(grads, self.obj_params + more_obj_params))
         if self.op.returns_loss:
             updates.loss = obj_target
 
-    @change_flags(compute_test_value='off')
-    def step_function(self, obj_n_mc=None, tf_n_mc=None,
-                      obj_optimizer=adagrad_window, test_optimizer=adagrad_window,
-                      more_obj_params=None, more_tf_params=None,
-                      more_updates=None, more_replacements=None,
-                      total_grad_norm_constraint=None,
-                      score=False, fn_kwargs=None):
-        R"""Step function that should be called on each optimization step.
+    @change_flags(compute_test_value="off")
+    def step_function(
+        self,
+        obj_n_mc=None,
+        tf_n_mc=None,
+        obj_optimizer=adagrad_window,
+        test_optimizer=adagrad_window,
+        more_obj_params=None,
+        more_tf_params=None,
+        more_updates=None,
+        more_replacements=None,
+        total_grad_norm_constraint=None,
+        score=False,
+        fn_kwargs=None,
+    ):
+        r"""Step function that should be called on each optimization step.
 
         Generally it solves the following problem:
 
@@ -305,25 +337,29 @@ def step_function(self, obj_n_mc=None, tf_n_mc=None,
         if fn_kwargs is None:
             fn_kwargs = {}
         if score and not self.op.returns_loss:
-            raise NotImplementedError('%s does not have loss' % self.op)
-        updates = self.updates(obj_n_mc=obj_n_mc, tf_n_mc=tf_n_mc,
-                               obj_optimizer=obj_optimizer,
-                               test_optimizer=test_optimizer,
-                               more_obj_params=more_obj_params,
-                               more_tf_params=more_tf_params,
-                               more_updates=more_updates,
-                               more_replacements=more_replacements,
-                               total_grad_norm_constraint=total_grad_norm_constraint)
+            raise NotImplementedError("%s does not have loss" % self.op)
+        updates = self.updates(
+            obj_n_mc=obj_n_mc,
+            tf_n_mc=tf_n_mc,
+            obj_optimizer=obj_optimizer,
+            test_optimizer=test_optimizer,
+            more_obj_params=more_obj_params,
+            more_tf_params=more_tf_params,
+            more_updates=more_updates,
+            more_replacements=more_replacements,
+            total_grad_norm_constraint=total_grad_norm_constraint,
+        )
         if score:
-            step_fn = theano.function(
-                [], updates.loss, updates=updates, **fn_kwargs)
+            step_fn = theano.function([], updates.loss, updates=updates, **fn_kwargs)
         else:
             step_fn = theano.function([], None, updates=updates, **fn_kwargs)
         return step_fn
 
-    @change_flags(compute_test_value='off')
-    def score_function(self, sc_n_mc=None, more_replacements=None, fn_kwargs=None):   # pragma: no cover
-        R"""Compile scoring function that operates which takes no inputs and returns Loss
+    @change_flags(compute_test_value="off")
+    def score_function(
+        self, sc_n_mc=None, more_replacements=None, fn_kwargs=None
+    ):  # pragma: no cover
+        r"""Compile scoring function that operates which takes no inputs and returns Loss
 
         Parameters
         ----------
@@ -341,25 +377,27 @@ def score_function(self, sc_n_mc=None, more_replacements=None, fn_kwargs=None):
         if fn_kwargs is None:
             fn_kwargs = {}
         if not self.op.returns_loss:
-            raise NotImplementedError('%s does not have loss' % self.op)
+            raise NotImplementedError("%s does not have loss" % self.op)
         if more_replacements is None:
             more_replacements = {}
         loss = self(sc_n_mc, more_replacements=more_replacements)
         return theano.function([], loss, **fn_kwargs)
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __call__(self, nmc, **kwargs):
-        if 'more_tf_params' in kwargs:
-            m = -1.
+        if "more_tf_params" in kwargs:
+            m = -1.0
         else:
-            m = 1.
+            m = 1.0
         a = self.op.apply(self.tf)
-        a = self.approx.set_size_and_deterministic(a, nmc, 0, kwargs.get('more_replacements'))
+        a = self.approx.set_size_and_deterministic(
+            a, nmc, 0, kwargs.get("more_replacements")
+        )
         return m * self.op.T(a)
 
 
 class Operator(object):
-    R"""**Base class for Operator**
+    r"""**Base class for Operator**
 
     Parameters
     ----------
@@ -381,11 +419,14 @@ class Operator(object):
     def __init__(self, approx):
         self.approx = approx
         if not self.supports_aevb and approx.has_local:
-            raise AEVBInferenceError('%s does not support AEVB, '
-                                     'please change inference method' % self)
+            raise AEVBInferenceError(
+                "%s does not support AEVB, " "please change inference method" % self
+            )
         if self.require_logq and not approx.has_logq:
-            raise ExplicitInferenceError('%s requires logq, but %s does not implement it'
-                                         'please change inference method' % (self, approx))
+            raise ExplicitInferenceError(
+                "%s requires logq, but %s does not implement it"
+                "please change inference method" % (self, approx)
+            )
 
     inputs = property(lambda self: self.approx.inputs)
     logp = property(lambda self: self.approx.logp)
@@ -398,8 +439,8 @@ def __init__(self, approx):
     logq_norm = property(lambda self: self.approx.logq_norm)
     model = property(lambda self: self.approx.model)
 
-    def apply(self, f):   # pragma: no cover
-        R"""Operator itself
+    def apply(self, f):  # pragma: no cover
+        r"""Operator itself
 
         .. math::
 
@@ -421,24 +462,25 @@ def apply(self, f):   # pragma: no cover
     def __call__(self, f=None):
         if self.has_test_function:
             if f is None:
-                raise ParametrizationError('Operator %s requires TestFunction' % self)
+                raise ParametrizationError("Operator %s requires TestFunction" % self)
             else:
                 if not isinstance(f, TestFunction):
                     f = TestFunction.from_function(f)
         else:
             if f is not None:
                 warnings.warn(
-                    'TestFunction for %s is redundant and removed' %
-                    self, stacklevel=3)
+                    "TestFunction for %s is redundant and removed" % self, stacklevel=3
+                )
             else:
                 pass
             f = TestFunction()
         f.setup(self.approx)
         return self.objective_class(self, f)
 
-    def __str__(self):    # pragma: no cover
-        return '%(op)s[%(ap)s]' % dict(op=self.__class__.__name__,
-                                       ap=self.approx.__class__.__name__)
+    def __str__(self):  # pragma: no cover
+        return "%(op)s[%(ap)s]" % dict(
+            op=self.__class__.__name__, ap=self.approx.__class__.__name__
+        )
 
 
 def collect_shared_to_list(params):
@@ -455,14 +497,14 @@ def collect_shared_to_list(params):
     """
     if isinstance(params, dict):
         return list(
-            t[1] for t in sorted(params.items(), key=lambda t: t[0])
+            t[1]
+            for t in sorted(params.items(), key=lambda t: t[0])
             if isinstance(t[1], theano.compile.SharedVariable)
         )
     elif params is None:
         return []
     else:
-        raise TypeError(
-            'Unknown type %s for %r, need dict or None')
+        raise TypeError("Unknown type %s for %r, need dict or None")
 
 
 class TestFunction(object):
@@ -483,14 +525,14 @@ def setup(self, approx):
     @classmethod
     def from_function(cls, f):
         if not callable(f):
-            raise ParametrizationError('Need callable, got %r' % f)
+            raise ParametrizationError("Need callable, got %r" % f)
         obj = TestFunction()
         obj.__call__ = f
         return obj
 
 
 class Group(WithMemoization):
-    R"""**Base class for grouping variables in VI**
+    r"""**Base class for grouping variables in VI**
 
     Grouped Approximation is used for modelling mutual dependencies
     for a specified group of variables. Base for local and global group.
@@ -713,24 +755,26 @@ class Group(WithMemoization):
     has_logq = True
 
     # some important defaults
-    initial_dist_name = 'normal'
-    initial_dist_map = 0.
+    initial_dist_name = "normal"
+    initial_dist_map = 0.0
 
     # for handy access using class methods
     __param_spec__ = dict()
-    short_name = ''
+    short_name = ""
     alias_names = frozenset()
     __param_registry = dict()
     __name_registry = dict()
 
     @classmethod
     def register(cls, sbcls):
-        assert frozenset(sbcls.__param_spec__) not in cls.__param_registry, 'Duplicate __param_spec__'
+        assert (
+            frozenset(sbcls.__param_spec__) not in cls.__param_registry
+        ), "Duplicate __param_spec__"
         cls.__param_registry[frozenset(sbcls.__param_spec__)] = sbcls
-        assert sbcls.short_name not in cls.__name_registry, 'Duplicate short_name'
+        assert sbcls.short_name not in cls.__name_registry, "Duplicate short_name"
         cls.__name_registry[sbcls.short_name] = sbcls
         for alias in sbcls.alias_names:
-            assert alias not in cls.__name_registry, 'Duplicate alias_name'
+            assert alias not in cls.__name_registry, "Duplicate alias_name"
             cls.__name_registry[alias] = sbcls
         return sbcls
 
@@ -739,9 +783,12 @@ def group_for_params(cls, params):
         if pm.variational.flows.seems_like_flow_params(params):
             return pm.variational.approximations.NormalizingFlowGroup
         if frozenset(params) not in cls.__param_registry:
-            raise KeyError('No such group for the following params: {!r}, '
-                           'only the following are supported\n\n{}'
-                           .format(params, cls.__param_registry))
+            raise KeyError(
+                "No such group for the following params: {!r}, "
+                "only the following are supported\n\n{}".format(
+                    params, cls.__param_registry
+                )
+            )
         return cls.__param_registry[frozenset(params)]
 
     @classmethod
@@ -749,37 +796,47 @@ def group_for_short_name(cls, name):
         if pm.variational.flows.seems_like_formula(name):
             return pm.variational.approximations.NormalizingFlowGroup
         if name.lower() not in cls.__name_registry:
-            raise KeyError('No such group: {!r}, '
-                           'only the following are supported\n\n{}'
-                           .format(name, cls.__name_registry))
+            raise KeyError(
+                "No such group: {!r}, "
+                "only the following are supported\n\n{}".format(
+                    name, cls.__name_registry
+                )
+            )
         return cls.__name_registry[name.lower()]
 
     def __new__(cls, group=None, vfam=None, params=None, *args, **kwargs):
         if cls is Group:
             if vfam is not None and params is not None:
-                raise TypeError('Cannot call Group with both `vfam` and `params` provided')
+                raise TypeError(
+                    "Cannot call Group with both `vfam` and `params` provided"
+                )
             elif vfam is not None:
                 return super(Group, cls).__new__(cls.group_for_short_name(vfam))
             elif params is not None:
                 return super(Group, cls).__new__(cls.group_for_params(params))
             else:
-                raise TypeError('Need to call Group with either `vfam` or `params` provided')
+                raise TypeError(
+                    "Need to call Group with either `vfam` or `params` provided"
+                )
         else:
             return super(Group, cls).__new__(cls)
 
-    def __init__(self, group,
-                 vfam=None,
-                 params=None,
-                 random_seed=None,
-                 model=None,
-                 local=False,
-                 rowwise=False,
-                 options=None,
-                 **kwargs):
+    def __init__(
+        self,
+        group,
+        vfam=None,
+        params=None,
+        random_seed=None,
+        model=None,
+        local=False,
+        rowwise=False,
+        options=None,
+        **kwargs
+    ):
         if local and not self.supports_batched:
-            raise LocalGroupError('%s does not support local groups' % self.__class__)
+            raise LocalGroupError("%s does not support local groups" % self.__class__)
         if local and rowwise:
-            raise LocalGroupError('%s does not support local grouping in rowwise mode')
+            raise LocalGroupError("%s does not support local grouping in rowwise mode")
         if isinstance(vfam, str):
             vfam = vfam.lower()
         if options is None:
@@ -808,7 +865,7 @@ def get_param_spec_for(cls, **kwargs):
         return res
 
     def _check_user_params(self, **kwargs):
-        R"""*Dev* - checks user params, allocates them if they are correct, returns True.
+        r"""*Dev* - checks user params, allocates them if they are correct, returns True.
         If they are not present, returns False
 
         Parameters
@@ -823,27 +880,29 @@ def _check_user_params(self, **kwargs):
         if user_params is None:
             return False
         if not isinstance(user_params, dict):
-            raise TypeError('params should be a dict')
+            raise TypeError("params should be a dict")
         givens = set(user_params.keys())
         needed = set(self.__param_spec__)
         if givens != needed:
             raise ParametrizationError(
-                'Passed parameters do not have a needed set of keys, '
-                'they should be equal, got {givens}, needed {needed}'.format(
-                    givens=givens, needed=needed))
+                "Passed parameters do not have a needed set of keys, "
+                "they should be equal, got {givens}, needed {needed}".format(
+                    givens=givens, needed=needed
+                )
+            )
         self._user_params = dict()
-        spec = self.get_param_spec_for(d=self.ddim, **kwargs.pop('spec_kw', {}))
+        spec = self.get_param_spec_for(d=self.ddim, **kwargs.pop("spec_kw", {}))
         for name, param in self.user_params.items():
             shape = spec[name]
             if self.local:
-                shape = (-1, ) + shape
+                shape = (-1,) + shape
             elif self.batched:
-                shape = (self.bdim, ) + shape
+                shape = (self.bdim,) + shape
             self._user_params[name] = tt.as_tensor(param).reshape(shape)
         return True
 
     def _initial_type(self, name):
-        R"""*Dev* - initial type with given name. The correct type depends on `self.batched`
+        r"""*Dev* - initial type with given name. The correct type depends on `self.batched`
 
         Parameters
         ----------
@@ -859,7 +918,7 @@ def _initial_type(self, name):
             return tt.matrix(name)
 
     def _input_type(self, name):
-        R"""*Dev* - input type with given name. The correct type depends on `self.batched`
+        r"""*Dev* - input type with given name. The correct type depends on `self.batched`
 
         Parameters
         ----------
@@ -874,24 +933,26 @@ def _input_type(self, name):
         else:
             return tt.vector(name)
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def __init_group__(self, group):
         if not group:
-            raise GroupError('Got empty group')
+            raise GroupError("Got empty group")
         if self.group is None:
             # delayed init
             self.group = group
         if self.batched and len(group) > 1:
             if self.local:  # better error message
-                raise LocalGroupError('Local groups with more than 1 variable are not supported')
+                raise LocalGroupError(
+                    "Local groups with more than 1 variable are not supported"
+                )
             else:
-                raise BatchedGroupError('Batched groups with more than 1 variable are not supported')
+                raise BatchedGroupError(
+                    "Batched groups with more than 1 variable are not supported"
+                )
         self.symbolic_initial = self._initial_type(
-            self.__class__.__name__ + '_symbolic_initial_tensor'
-        )
-        self.input = self._input_type(
-            self.__class__.__name__ + '_symbolic_input'
+            self.__class__.__name__ + "_symbolic_initial_tensor"
         )
+        self.input = self._input_type(self.__class__.__name__ + "_symbolic_input")
         # I do some staff that is not supported by standard __init__
         # so I have to to it by myself
         self.ordering = ArrayOrdering([])
@@ -899,18 +960,19 @@ def __init_group__(self, group):
         self.group = [get_transformed(var) for var in self.group]
         for var in self.group:
             if isinstance(var.distribution, pm.Discrete):
-                raise ParametrizationError('Discrete variables are not supported by VI: {}'
-                                           .format(var))
+                raise ParametrizationError(
+                    "Discrete variables are not supported by VI: {}".format(var)
+                )
             begin = self.ddim
             if self.batched:
                 if var.ndim < 1:
                     if self.local:
-                        raise LocalGroupError('Local variable should not be scalar')
+                        raise LocalGroupError("Local variable should not be scalar")
                     else:
-                        raise BatchedGroupError('Batched variable should not be scalar')
+                        raise BatchedGroupError("Batched variable should not be scalar")
                 self.ordering.size += (np.prod(var.dshape[1:])).astype(int)
                 if self.local:
-                    shape = (-1, ) + var.dshape[1:]
+                    shape = (-1,) + var.dshape[1:]
                 else:
                     shape = var.dshape
             else:
@@ -921,7 +983,7 @@ def __init_group__(self, group):
             self.ordering.vmap.append(vmap)
             self.ordering.by_name[vmap.var] = vmap
             vr = self.input[..., vmap.slc].reshape(shape).astype(vmap.dtyp)
-            vr.name = vmap.var + '_vi_replacement'
+            vr.name = vmap.var + "_vi_replacement"
             self.replacements[var] = vr
         self.bij = DictToArrayBijection(self.ordering, {})
 
@@ -1022,7 +1084,7 @@ def _new_initial(self, size, deterministic, more_replacements=None):
         dim, dist_name, dist_map = (
             self.ddim,
             self.initial_dist_name,
-            self.initial_dist_map
+            self.initial_dist_map,
         )
         dtype = self.symbolic_initial.dtype
         dim = tt.as_tensor(dim)
@@ -1036,11 +1098,7 @@ def _new_initial(self, size, deterministic, more_replacements=None):
                 return getattr(self._rng, dist_name)(shape)
         else:
             sample = getattr(self._rng, dist_name)(shape)
-            initial = tt.switch(
-                deterministic,
-                tt.ones(shape, dtype) * dist_map,
-                sample
-            )
+            initial = tt.switch(deterministic, tt.ones(shape, dtype) * dist_map, sample)
             return initial
 
     @node_property
@@ -1065,7 +1123,7 @@ def symbolic_random2d(self):
         else:
             return self.symbolic_random
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
         :func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1085,7 +1143,9 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         -------
         :class:`Variable` with applied replacements, ready to use
         """
-        flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
+        flat2rand = self.make_size_and_deterministic_replacements(
+            s, d, more_replacements
+        )
         node_out = theano.clone(node, flat2rand)
         try_to_set_test_value(node, node_out, s)
         return node_out
@@ -1106,8 +1166,7 @@ def symbolic_sample_over_posterior(self, node):
         def sample(post):
             return theano.clone(node, {self.input: post})
 
-        nodes, _ = theano.scan(
-            sample, random)
+        nodes, _ = theano.scan(sample, random)
         return nodes
 
     def symbolic_single_sample(self, node):
@@ -1118,9 +1177,7 @@ def symbolic_single_sample(self, node):
         node = self.to_flat_input(node)
         random = self.symbolic_random.astype(self.symbolic_initial.dtype)
         random = tt.patternbroadcast(random, self.symbolic_initial.broadcastable)
-        return theano.clone(
-            node, {self.input: random[0]}
-        )
+        return theano.clone(node, {self.input: random[0]})
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
         """*Dev* - creates correct replacements for initial depending on
@@ -1148,8 +1205,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
     @node_property
     def symbolic_normalizing_constant(self):
         """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`"""
-        t = self.to_flat_input(
-            tt.max([v.scaling for v in self.group]))
+        t = self.to_flat_input(tt.max([v.scaling for v in self.group]))
         t = self.symbolic_single_sample(t)
         return pm.floatX(t)
 
@@ -1185,14 +1241,14 @@ def logq_norm(self):
 
     def __str__(self):
         if self.group is None:
-            shp = 'undefined'
+            shp = "undefined"
         else:
             shp = str(self.ddim)
             if self.local:
-                shp = 'None, ' + shp
+                shp = "None, " + shp
             elif self.batched:
-                shp = str(self.bdim) + ', ' + shp
-        return '{cls}[{shp}]'.format(shp=shp, cls=self.__class__.__name__)
+                shp = str(self.bdim) + ", " + shp
+        return "{cls}[{shp}]".format(shp=shp, cls=self.__class__.__name__)
 
     @node_property
     def std(self):
@@ -1245,25 +1301,26 @@ def __init__(self, groups, model=None):
         self._scale_cost_to_minibatch = theano.shared(np.int8(1))
         model = modelcontext(model)
         if not model.free_RVs:
-            raise TypeError('Model does not have FreeRVs')
+            raise TypeError("Model does not have FreeRVs")
         self.groups = list()
         seen = set()
         rest = None
         for g in groups:
             if g.group is None:
                 if rest is not None:
-                    raise GroupError('More than one group is specified for '
-                                     'the rest variables')
+                    raise GroupError(
+                        "More than one group is specified for " "the rest variables"
+                    )
                 else:
                     rest = g
             else:
                 if set(g.group) & seen:
-                    raise GroupError('Found duplicates in groups')
+                    raise GroupError("Found duplicates in groups")
                 seen.update(g.group)
                 self.groups.append(g)
         if set(model.free_RVs) - seen:
             if rest is None:
-                raise GroupError('No approximation is specified for the rest variables')
+                raise GroupError("No approximation is specified for the rest variables")
             else:
                 rest.__init_group__(list(set(model.free_RVs) - seen))
                 self.groups.append(rest)
@@ -1271,22 +1328,24 @@ def __init__(self, groups, model=None):
 
     @property
     def has_logq(self):
-        return all(self.collect('has_logq'))
+        return all(self.collect("has_logq"))
 
-    def collect(self, item, part='total'):
-        if part == 'total':
+    def collect(self, item, part="total"):
+        if part == "total":
             return [getattr(g, item) for g in self.groups]
-        elif part == 'local':
+        elif part == "local":
             return [getattr(g, item) for g in self.groups if g.local]
-        elif part == 'global':
+        elif part == "global":
             return [getattr(g, item) for g in self.groups if not g.local]
-        elif part == 'batched':
+        elif part == "batched":
             return [getattr(g, item) for g in self.groups if g.batched]
         else:
-            raise ValueError("unknown part %s, expected {'local', 'global', 'total', 'batched'}")
+            raise ValueError(
+                "unknown part %s, expected {'local', 'global', 'total', 'batched'}"
+            )
 
-    inputs = property(lambda self: self.collect('input'))
-    symbolic_randoms = property(lambda self: self.collect('symbolic_random'))
+    inputs = property(lambda self: self.collect("input"))
+    symbolic_randoms = property(lambda self: self.collect("symbolic_random"))
 
     @property
     def scale_cost_to_minibatch(self):
@@ -1303,22 +1362,21 @@ def symbolic_normalizing_constant(self):
         Here the effect is controlled by `self.scale_cost_to_minibatch`
         """
         t = tt.max(
-            self.collect('symbolic_normalizing_constant') + [
-                var.scaling for var in self.model.observed_RVs
-            ])
-        t = tt.switch(self._scale_cost_to_minibatch, t,
-                      tt.constant(1, dtype=t.dtype))
+            self.collect("symbolic_normalizing_constant")
+            + [var.scaling for var in self.model.observed_RVs]
+        )
+        t = tt.switch(self._scale_cost_to_minibatch, t, tt.constant(1, dtype=t.dtype))
         return pm.floatX(t)
 
     @node_property
     def symbolic_logq(self):
         """*Dev* - collects `symbolic_logq` for all groups"""
-        return tt.add(*self.collect('symbolic_logq'))
+        return tt.add(*self.collect("symbolic_logq"))
 
     @node_property
     def logq(self):
         """*Dev* - collects `logQ` for all groups"""
-        return tt.add(*self.collect('logq'))
+        return tt.add(*self.collect("logq"))
 
     @node_property
     def logq_norm(self):
@@ -1329,7 +1387,8 @@ def logq_norm(self):
     def _sized_symbolic_varlogp_and_datalogp(self):
         """*Dev* - computes sampled prior term from model via `theano.scan`"""
         varlogp_s, datalogp_s = self.symbolic_sample_over_posterior(
-            [self.model.varlogpt, self.model.datalogpt])
+            [self.model.varlogpt, self.model.datalogpt]
+        )
         return varlogp_s, datalogp_s  # both shape (s,)
 
     @node_property
@@ -1366,7 +1425,8 @@ def datalogp(self):
     def _single_symbolic_varlogp_and_datalogp(self):
         """*Dev* - computes sampled prior term from model via `theano.scan`"""
         varlogp, datalogp = self.symbolic_single_sample(
-            [self.model.varlogpt, self.model.datalogpt])
+            [self.model.varlogpt, self.model.datalogpt]
+        )
         return varlogp, datalogp
 
     @node_property
@@ -1405,9 +1465,9 @@ def datalogp_norm(self):
     @property
     def replacements(self):
         """*Dev* - all replacements from groups to replace PyMC random variables with approximation"""
-        return collections.OrderedDict(itertools.chain.from_iterable(
-            g.replacements.items() for g in self.groups
-        ))
+        return collections.OrderedDict(
+            itertools.chain.from_iterable(g.replacements.items() for g in self.groups)
+        )
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
         """*Dev* - creates correct replacements for initial depending on
@@ -1430,11 +1490,13 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
             more_replacements = {}
         flat2rand = collections.OrderedDict()
         for g in self.groups:
-            flat2rand.update(g.make_size_and_deterministic_replacements(s, d, more_replacements))
+            flat2rand.update(
+                g.make_size_and_deterministic_replacements(s, d, more_replacements)
+            )
         flat2rand.update(more_replacements)
         return flat2rand
 
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
         :func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1456,7 +1518,9 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """
         _node = node
         optimizations = self.get_optimization_replacements(s, d)
-        flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
+        flat2rand = self.make_size_and_deterministic_replacements(
+            s, d, more_replacements
+        )
         node = theano.clone(node, optimizations)
         node = theano.clone(node, flat2rand)
         try_to_set_test_value(_node, node, s)
@@ -1476,8 +1540,7 @@ def symbolic_sample_over_posterior(self, node):
         def sample(*post):
             return theano.clone(node, dict(zip(self.inputs, post)))
 
-        nodes, _ = theano.scan(
-            sample, self.symbolic_randoms)
+        nodes, _ = theano.scan(sample, self.symbolic_randoms)
         return nodes
 
     def symbolic_single_sample(self, node):
@@ -1488,9 +1551,7 @@ def symbolic_single_sample(self, node):
         node = self.to_flat_input(node)
         post = [v[0] for v in self.symbolic_randoms]
         inp = self.inputs
-        return theano.clone(
-            node, dict(zip(inp, post))
-        )
+        return theano.clone(node, dict(zip(inp, post)))
 
     def get_optimization_replacements(self, s, d):
         """*Dev* - optimizations for logP. If sample size is static and equal to 1:
@@ -1503,10 +1564,8 @@ def get_optimization_replacements(self, s, d):
             repl[self.datalogp] = self.single_symbolic_datalogp
         return repl
 
-    @change_flags(compute_test_value='off')
-    def sample_node(self, node, size=None,
-                    deterministic=False,
-                    more_replacements=None):
+    @change_flags(compute_test_value="off")
+    def sample_node(self, node, size=None, deterministic=False, more_replacements=None):
         """Samples given node or nodes over shared posterior
 
         Parameters
@@ -1530,7 +1589,9 @@ def sample_node(self, node, size=None,
             node_out = self.symbolic_single_sample(node)
         else:
             node_out = self.symbolic_sample_over_posterior(node)
-        node_out = self.set_size_and_deterministic(node_out, size, deterministic, more_replacements)
+        node_out = self.set_size_and_deterministic(
+            node_out, size, deterministic, more_replacements
+        )
         try_to_set_test_value(node_in, node_out, size)
         return node_out
 
@@ -1538,24 +1599,27 @@ def rslice(self, name):
         """*Dev* - vectorized sampling for named random variable without call to `theano.scan`.
         This node still needs :func:`set_size_and_deterministic` to be evaluated
         """
+
         def vars_names(vs):
             return {v.name for v in vs}
+
         for vars_, random, ordering in zip(
-                self.collect('group'),
-                self.symbolic_randoms,
-                self.collect('ordering')):
+            self.collect("group"), self.symbolic_randoms, self.collect("ordering")
+        ):
             if name in vars_names(vars_):
                 name_, slc, shape, dtype = ordering[name]
-                found = random[..., slc].reshape((random.shape[0], ) + shape).astype(dtype)
-                found.name = name + '_vi_random_slice'
+                found = (
+                    random[..., slc].reshape((random.shape[0],) + shape).astype(dtype)
+                )
+                found.name = name + "_vi_random_slice"
                 break
         else:
-            raise KeyError('%r not found' % name)
+            raise KeyError("%r not found" % name)
         return found
 
     @property
     @memoize(bound=True)
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def sample_dict_fn(self):
         s = tt.iscalar()
         names = [v.name for v in self.model.free_RVs]
@@ -1565,7 +1629,9 @@ def sample_dict_fn(self):
 
         def inner(draws=100):
             _samples = sample_fn(draws)
-            return dict([(v_.name, s_) for v_, s_ in zip(self.model.free_RVs, _samples)])
+            return dict(
+                [(v_.name, s_) for v_, s_ in zip(self.model.free_RVs, _samples)]
+            )
 
         return inner
 
@@ -1584,13 +1650,19 @@ def sample(self, draws=500, include_transformed=True):
         trace : :class:`pymc3.backends.base.MultiTrace`
             Samples drawn from variational posterior.
         """
-        vars_sampled = get_default_varnames(self.model.unobserved_RVs,
-                                            include_transformed=include_transformed)
+        vars_sampled = get_default_varnames(
+            self.model.unobserved_RVs, include_transformed=include_transformed
+        )
         samples = self.sample_dict_fn(draws)  # type: dict
-        points = ({name: records[i] for name, records in samples.items()} for i in range(draws))
-        trace = pm.sampling.NDArray(model=self.model, vars=vars_sampled, test_point={
-            name: records[0] for name, records in samples.items()
-        })
+        points = (
+            {name: records[i] for name, records in samples.items()}
+            for i in range(draws)
+        )
+        trace = pm.sampling.NDArray(
+            model=self.model,
+            vars=vars_sampled,
+            test_point={name: records[0] for name, records in samples.items()},
+        )
         try:
             trace.setup(draws=draws, chain=0)
             for point in points:
@@ -1601,34 +1673,34 @@ def sample(self, draws=500, include_transformed=True):
 
     @property
     def ndim(self):
-        return sum(self.collect('ndim'))
+        return sum(self.collect("ndim"))
 
     @property
     def ddim(self):
-        return sum(self.collect('ddim'))
+        return sum(self.collect("ddim"))
 
     @property
     def has_local(self):
-        return any(self.collect('local'))
+        return any(self.collect("local"))
 
     @property
     def has_global(self):
-        return any(not c for c in self.collect('local'))
+        return any(not c for c in self.collect("local"))
 
     @property
     def has_batched(self):
-        return any(not c for c in self.collect('batched'))
+        return any(not c for c in self.collect("batched"))
 
     @node_property
     def symbolic_random(self):
-        return tt.concatenate(self.collect('symbolic_random2d'), axis=-1)
+        return tt.concatenate(self.collect("symbolic_random2d"), axis=-1)
 
     def __str__(self):
         if len(self.groups) < 5:
-            return 'Approximation{' + ' & '.join(map(str, self.groups)) + '}'
+            return "Approximation{" + " & ".join(map(str, self.groups)) + "}"
         else:
-            forprint = self.groups[:2] + ['...'] + self.groups[-2:]
-            return 'Approximation{' + ' & '.join(map(str, forprint)) + '}'
+            forprint = self.groups[:2] + ["..."] + self.groups[-2:]
+            return "Approximation{" + " & ".join(map(str, forprint)) + "}"
 
     @property
     def all_histograms(self):
@@ -1641,9 +1713,11 @@ def any_histograms(self):
     @node_property
     def joint_histogram(self):
         if not self.all_histograms:
-            raise VariationalInferenceError('%s does not consist of all Empirical approximations')
-        return tt.concatenate(self.collect('histogram'), axis=-1)
+            raise VariationalInferenceError(
+                "%s does not consist of all Empirical approximations"
+            )
+        return tt.concatenate(self.collect("histogram"), axis=-1)
 
     @property
     def params(self):
-        return sum(self.collect('params'), [])
+        return sum(self.collect("params"), [])
diff --git a/pymc3/variational/stein.py b/pymc3/variational/stein.py
index cc3b0fccb2..543ab71843 100644
--- a/pymc3/variational/stein.py
+++ b/pymc3/variational/stein.py
@@ -4,9 +4,7 @@
 from pymc3.theanof import floatX, change_flags
 from pymc3.memoize import WithMemoization, memoize
 
-__all__ = [
-    'Stein'
-]
+__all__ = ["Stein"]
 
 
 class Stein(WithMemoization):
@@ -26,27 +24,24 @@ def input_joint_matrix(self):
     @node_property
     def approx_symbolic_matrices(self):
         if self.use_histogram:
-            return self.approx.collect('histogram')
+            return self.approx.collect("histogram")
         else:
             return self.approx.symbolic_randoms
 
     @node_property
     def dlogp(self):
-        grad = tt.grad(
-            self.logp_norm.sum(),
-            self.approx_symbolic_matrices
-        )
+        grad = tt.grad(self.logp_norm.sum(), self.approx_symbolic_matrices)
 
         def flatten2(tensor):
             return tensor.flatten(2)
+
         return tt.concatenate(list(map(flatten2, grad)), -1)
 
     @node_property
     def grad(self):
         n = floatX(self.input_joint_matrix.shape[0])
         temperature = self.temperature
-        svgd_grad = (self.density_part_grad / temperature +
-                     self.repulsive_part_grad)
+        svgd_grad = self.density_part_grad / temperature + self.repulsive_part_grad
         return svgd_grad / n
 
     @node_property
@@ -75,11 +70,13 @@ def logp_norm(self):
         if self.use_histogram:
             sized_symbolic_logp = theano.clone(
                 sized_symbolic_logp,
-                dict(zip(self.approx.symbolic_randoms, self.approx.collect('histogram')))
+                dict(
+                    zip(self.approx.symbolic_randoms, self.approx.collect("histogram"))
+                ),
             )
         return sized_symbolic_logp / self.approx.symbolic_normalizing_constant
 
     @memoize
-    @change_flags(compute_test_value='off')
+    @change_flags(compute_test_value="off")
     def _kernel(self):
         return self._kernel_f(self.input_joint_matrix)
diff --git a/pymc3/variational/test_functions.py b/pymc3/variational/test_functions.py
index 6f6b919ff0..fa99bac71a 100644
--- a/pymc3/variational/test_functions.py
+++ b/pymc3/variational/test_functions.py
@@ -2,9 +2,7 @@
 from .opvi import TestFunction
 from pymc3.theanof import floatX
 
-__all__ = [
-    'rbf'
-]
+__all__ = ["rbf"]
 
 
 class Kernel(TestFunction):
@@ -21,20 +19,22 @@ class Kernel(TestFunction):
 class RBF(Kernel):
     def __call__(self, X):
         XY = X.dot(X.T)
-        x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, 'x')
+        x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, "x")
         X2e = tt.repeat(x2, X.shape[0], axis=1)
-        H = X2e + X2e.T - 2. * XY
+        H = X2e + X2e.T - 2.0 * XY
 
         V = tt.sort(H.flatten())
         length = V.shape[0]
         # median distance
-        m = tt.switch(tt.eq((length % 2), 0),
-                      # if even vector
-                      tt.mean(V[((length // 2) - 1):((length // 2) + 1)]),
-                      # if odd vector
-                      V[length // 2])
-
-        h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1))
+        m = tt.switch(
+            tt.eq((length % 2), 0),
+            # if even vector
+            tt.mean(V[((length // 2) - 1) : ((length // 2) + 1)]),
+            # if odd vector
+            V[length // 2],
+        )
+
+        h = 0.5 * m / tt.log(floatX(H.shape[0]) + floatX(1))
 
         #  RBF
         Kxy = tt.exp(-H / h / 2.0)
diff --git a/pymc3/variational/updates.py b/pymc3/variational/updates.py
index d9b4319f5c..ae9cbf1fa8 100755
--- a/pymc3/variational/updates.py
+++ b/pymc3/variational/updates.py
@@ -148,13 +148,17 @@ def get_or_compute_grads(loss_or_grads, params):
         compute its gradient, we can never update it and want to fail early).
     """
     if any(not isinstance(p, theano.compile.SharedVariable) for p in params):
-        raise ValueError("params must contain shared variables only. If it "
-                         "contains arbitrary parameter expressions, then "
-                         "lasagne.utils.collect_shared_vars() may help you.")
+        raise ValueError(
+            "params must contain shared variables only. If it "
+            "contains arbitrary parameter expressions, then "
+            "lasagne.utils.collect_shared_vars() may help you."
+        )
     if isinstance(loss_or_grads, list):
         if not len(loss_or_grads) == len(params):
-            raise ValueError("Got %d gradient expressions for %d parameters" %
-                             (len(loss_or_grads), len(params)))
+            raise ValueError(
+                "Got %d gradient expressions for %d parameters"
+                % (len(loss_or_grads), len(params))
+            )
         return loss_or_grads
     else:
         return theano.grad(loss_or_grads, params)
@@ -162,8 +166,8 @@ def get_or_compute_grads(loss_or_grads, params):
 
 def _get_call_kwargs(_locals_):
     _locals_ = _locals_.copy()
-    _locals_.pop('loss_or_grads')
-    _locals_.pop('params')
+    _locals_.pop("loss_or_grads")
+    _locals_.pop("params")
     return _locals_
 
 
@@ -211,7 +215,8 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3):
         return partial(sgd, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
 
@@ -260,8 +265,9 @@ def apply_momentum(updates, params=None, momentum=0.9):
 
     for param in params:
         value = param.get_value(borrow=True)
-        velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                                 broadcastable=param.broadcastable)
+        velocity = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
         x = momentum * velocity + updates[param]
         updates[velocity] = x - param
         updates[param] = x
@@ -269,8 +275,7 @@ def apply_momentum(updates, params=None, momentum=0.9):
     return updates
 
 
-def momentum(loss_or_grads=None, params=None,
-             learning_rate=1e-3, momentum=0.9):
+def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
     """Stochastic Gradient Descent (SGD) updates with momentum
 
     Generates update expressions of the form:
@@ -326,7 +331,8 @@ def momentum(loss_or_grads=None, params=None,
         return partial(pm.updates.momentum, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     updates = sgd(loss_or_grads, params, learning_rate)
     return apply_momentum(updates, momentum=momentum)
 
@@ -376,8 +382,9 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
 
     for param in params:
         value = param.get_value(borrow=True)
-        velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                                 broadcastable=param.broadcastable)
+        velocity = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
         x = momentum * velocity + updates[param] - param
         updates[velocity] = x
         updates[param] = momentum * x + updates[param]
@@ -385,8 +392,9 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
     return updates
 
 
-def nesterov_momentum(loss_or_grads=None, params=None,
-                      learning_rate=1e-3, momentum=0.9):
+def nesterov_momentum(
+    loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9
+):
     """Stochastic Gradient Descent (SGD) updates with Nesterov momentum
 
     Generates update expressions of the form:
@@ -447,7 +455,8 @@ def nesterov_momentum(loss_or_grads=None, params=None,
         return partial(nesterov_momentum, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     updates = sgd(loss_or_grads, params, learning_rate)
     return apply_nesterov_momentum(updates, momentum=momentum)
 
@@ -516,24 +525,26 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
         return partial(adagrad, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
-        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                             broadcastable=param.broadcastable)
+        accu = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
         accu_new = accu + grad ** 2
         updates[accu] = accu_new
-        updates[param] = param - (learning_rate * grad /
-                                  tt.sqrt(accu_new + epsilon))
+        updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon))
 
     return updates
 
 
-def adagrad_window(loss_or_grads=None, params=None,
-                   learning_rate=0.001, epsilon=.1, n_win=10):
+def adagrad_window(
+    loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10
+):
     """Returns a function that returns parameter updates.
     Instead of accumulated estimate, uses running window
 
@@ -558,15 +569,16 @@ def adagrad_window(loss_or_grads=None, params=None,
     if loss_or_grads is None and params is None:
         return partial(adagrad_window, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
-        raise ValueError('Please provide both `loss_or_grads` and `params` to get updates')
+        raise ValueError(
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
     for param, grad in zip(params, grads):
         i = theano.shared(pm.floatX(0))
-        i_int = i.astype('int32')
+        i_int = i.astype("int32")
         value = param.get_value(borrow=True)
-        accu = theano.shared(
-            np.zeros(value.shape + (n_win,), dtype=value.dtype))
+        accu = theano.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype))
 
         # Append squared gradient vector to accu_new
         accu_new = tt.set_subtensor(accu[..., i_int], grad ** 2)
@@ -575,13 +587,11 @@ def adagrad_window(loss_or_grads=None, params=None,
         updates[i] = i_new
 
         accu_sum = accu_new.sum(axis=-1)
-        updates[param] = param - (learning_rate * grad /
-                                  tt.sqrt(accu_sum + epsilon))
+        updates[param] = param - (learning_rate * grad / tt.sqrt(accu_sum + epsilon))
     return updates
 
 
-def rmsprop(loss_or_grads=None, params=None,
-            learning_rate=1.0, rho=0.9, epsilon=1e-6):
+def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon=1e-6):
     """RMSProp updates
 
     Scale learning rates by dividing with the moving average of the root mean
@@ -646,7 +656,8 @@ def rmsprop(loss_or_grads=None, params=None,
         return partial(rmsprop, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
 
@@ -655,18 +666,19 @@ def rmsprop(loss_or_grads=None, params=None,
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
-        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                             broadcastable=param.broadcastable)
+        accu = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
         accu_new = rho * accu + (one - rho) * grad ** 2
         updates[accu] = accu_new
-        updates[param] = param - (learning_rate * grad /
-                                  tt.sqrt(accu_new + epsilon))
+        updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon))
 
     return updates
 
 
-def adadelta(loss_or_grads=None, params=None,
-             learning_rate=1.0, rho=0.95, epsilon=1e-6):
+def adadelta(
+    loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsilon=1e-6
+):
     """ Adadelta updates
 
     Scale learning rates by the ratio of accumulated gradients to accumulated
@@ -740,7 +752,8 @@ def adadelta(loss_or_grads=None, params=None,
         return partial(adadelta, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
 
@@ -750,19 +763,20 @@ def adadelta(loss_or_grads=None, params=None,
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
         # accu: accumulate gradient magnitudes
-        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                             broadcastable=param.broadcastable)
+        accu = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
         # delta_accu: accumulate update magnitudes (recursively!)
-        delta_accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                                   broadcastable=param.broadcastable)
+        delta_accu = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
 
         # update accu (as in rmsprop)
         accu_new = rho * accu + (one - rho) * grad ** 2
         updates[accu] = accu_new
 
         # compute parameter update, using the 'old' delta_accu
-        update = (grad * tt.sqrt(delta_accu + epsilon) /
-                  tt.sqrt(accu_new + epsilon))
+        update = grad * tt.sqrt(delta_accu + epsilon) / tt.sqrt(accu_new + epsilon)
         updates[param] = param - learning_rate * update
 
         # update delta_accu (as accu, but accumulating updates)
@@ -772,8 +786,14 @@ def adadelta(loss_or_grads=None, params=None,
     return updates
 
 
-def adam(loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9,
-         beta2=0.999, epsilon=1e-8):
+def adam(
+    loss_or_grads=None,
+    params=None,
+    learning_rate=0.001,
+    beta1=0.9,
+    beta2=0.999,
+    epsilon=1e-8,
+):
     """Adam updates
 
     Adam updates implemented as in [1]_.
@@ -831,26 +851,29 @@ def adam(loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9,
         return partial(adam, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     all_grads = get_or_compute_grads(loss_or_grads, params)
-    t_prev = theano.shared(pm.theanof.floatX(0.))
+    t_prev = theano.shared(pm.theanof.floatX(0.0))
     updates = OrderedDict()
 
     # Using theano constant to prevent upcasting of float32
     one = tt.constant(1)
 
     t = t_prev + 1
-    a_t = learning_rate * tt.sqrt(one - beta2**t) / (one - beta1**t)
+    a_t = learning_rate * tt.sqrt(one - beta2 ** t) / (one - beta1 ** t)
 
     for param, g_t in zip(params, all_grads):
         value = param.get_value(borrow=True)
-        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                               broadcastable=param.broadcastable)
-        v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                               broadcastable=param.broadcastable)
+        m_prev = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
+        v_prev = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
 
         m_t = beta1 * m_prev + (one - beta1) * g_t
-        v_t = beta2 * v_prev + (one - beta2) * g_t**2
+        v_t = beta2 * v_prev + (one - beta2) * g_t ** 2
         step = a_t * m_t / (tt.sqrt(v_t) + epsilon)
 
         updates[m_prev] = m_t
@@ -861,8 +884,14 @@ def adam(loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9,
     return updates
 
 
-def adamax(loss_or_grads=None, params=None, learning_rate=0.002, beta1=0.9,
-           beta2=0.999, epsilon=1e-8):
+def adamax(
+    loss_or_grads=None,
+    params=None,
+    learning_rate=0.002,
+    beta1=0.9,
+    beta2=0.999,
+    epsilon=1e-8,
+):
     """Adamax updates
 
     Adamax updates implemented as in [1]_. This is a variant of of the Adam
@@ -917,23 +946,26 @@ def adamax(loss_or_grads=None, params=None, learning_rate=0.002, beta1=0.9,
         return partial(adamax, **_get_call_kwargs(locals()))
     elif loss_or_grads is None or params is None:
         raise ValueError(
-            'Please provide both `loss_or_grads` and `params` to get updates')
+            "Please provide both `loss_or_grads` and `params` to get updates"
+        )
     all_grads = get_or_compute_grads(loss_or_grads, params)
-    t_prev = theano.shared(pm.theanof.floatX(0.))
+    t_prev = theano.shared(pm.theanof.floatX(0.0))
     updates = OrderedDict()
 
     # Using theano constant to prevent upcasting of float32
     one = tt.constant(1)
 
     t = t_prev + 1
-    a_t = learning_rate / (one - beta1**t)
+    a_t = learning_rate / (one - beta1 ** t)
 
     for param, g_t in zip(params, all_grads):
         value = param.get_value(borrow=True)
-        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                               broadcastable=param.broadcastable)
-        u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                               broadcastable=param.broadcastable)
+        m_prev = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
+        u_prev = theano.shared(
+            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+        )
 
         m_t = beta1 * m_prev + (one - beta1) * g_t
         u_t = tt.maximum(beta2 * u_prev, abs(g_t))
@@ -1021,14 +1053,12 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
     dtype = np.dtype(theano.config.floatX).type
     norms = tt.sqrt(tt.sum(tt.sqr(tensor_var), axis=sum_over, keepdims=True))
     target_norms = tt.clip(norms, 0, dtype(max_norm))
-    constrained_output = \
-        (tensor_var * (target_norms / (dtype(epsilon) + norms)))
+    constrained_output = tensor_var * (target_norms / (dtype(epsilon) + norms))
 
     return constrained_output
 
 
-def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7,
-                          return_norm=False):
+def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False):
     """Rescales a list of tensors based on their combined norm
 
     If the combined norm of the input tensors exceeds the threshold then all
@@ -1083,7 +1113,7 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7,
        learning with neural networks. In Advances in Neural Information
        Processing Systems (pp. 3104-3112).
     """
-    norm = tt.sqrt(sum(tt.sum(tensor**2) for tensor in tensor_vars))
+    norm = tt.sqrt(sum(tt.sum(tensor ** 2) for tensor in tensor_vars))
     dtype = np.dtype(theano.config.floatX).type
     target_norm = tt.clip(norm, 0, dtype(max_norm))
     multiplier = target_norm / (dtype(epsilon) + norm)
diff --git a/pymc3/vartypes.py b/pymc3/vartypes.py
index d0a4f20754..fc8d8e033d 100644
--- a/pymc3/vartypes.py
+++ b/pymc3/vartypes.py
@@ -2,23 +2,24 @@
 
 import six
 
-__all__ = ['bool_types', 'int_types', 'float_types', 'complex_types', 'continuous_types',
-           'discrete_types', 'typefilter', 'isgenerator']
-
-bool_types = set(['int8'])
-
-int_types = set(['int8',
-                 'int16',
-                 'int32',
-                 'int64',
-                 'uint8',
-                 'uint16',
-                 'uint32',
-                 'uint64'])
-float_types = set(['float32',
-                   'float64'])
-complex_types = set(['complex64',
-                     'complex128'])
+__all__ = [
+    "bool_types",
+    "int_types",
+    "float_types",
+    "complex_types",
+    "continuous_types",
+    "discrete_types",
+    "typefilter",
+    "isgenerator",
+]
+
+bool_types = set(["int8"])
+
+int_types = set(
+    ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"]
+)
+float_types = set(["float32", "float64"])
+complex_types = set(["complex64", "complex128"])
 continuous_types = float_types | complex_types
 discrete_types = bool_types | int_types
 
@@ -34,5 +35,4 @@ def typefilter(vars, types):
 
 
 def isgenerator(obj):
-    return ((hasattr(obj, '__next__') and six.PY3) or
-            (hasattr(obj, 'next') and six.PY2))
+    return (hasattr(obj, "__next__") and six.PY3) or (hasattr(obj, "next") and six.PY2)