diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 29766e87f2d..fb52875f2f4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,3 +7,8 @@ repos:
       name: nbqa-isort
       alias: nbqa-isort
       additional_dependencies: ['isort']
+- repo: https://github.com/asottile/pyupgrade
+  rev: v2.7.2
+  hooks:
+    - id: pyupgrade
+      args: ['--py36-plus']
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 26df368eadd..546b461f101 100755
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # pymc3 documentation build configuration file, created by
 # sphinx-quickstart on Sat Dec 26 14:40:23 2015.
diff --git a/docs/source/sphinxext/gallery_generator.py b/docs/source/sphinxext/gallery_generator.py
index f597b666b94..2e9c716a8fb 100644
--- a/docs/source/sphinxext/gallery_generator.py
+++ b/docs/source/sphinxext/gallery_generator.py
@@ -70,13 +70,13 @@ def __init__(self, filename, target_dir):
         self.basename = os.path.basename(filename)
         self.stripped_name = os.path.splitext(self.basename)[0]
         self.output_html = os.path.join(
-            "..", "notebooks", "{}.html".format(self.stripped_name)
+            "..", "notebooks", f"{self.stripped_name}.html"
         )
         self.image_dir = os.path.join(target_dir, "_images")
         self.png_path = os.path.join(
-            self.image_dir, "{}.png".format(self.stripped_name)
+            self.image_dir, f"{self.stripped_name}.png"
         )
-        with open(filename, "r") as fid:
+        with open(filename) as fid:
             self.json_source = json.load(fid)
         self.pagetitle = self.extract_title()
         self.default_image_loc = DEFAULT_IMG_LOC
@@ -89,7 +89,7 @@ def __init__(self, filename, target_dir):
 
             self.gen_previews()
         else:
-            print("skipping {0}".format(filename))
+            print(f"skipping {filename}")
 
     def extract_preview_pic(self):
         """By default, just uses the last image in the notebook."""
@@ -136,7 +136,7 @@ def build_gallery(srcdir, gallery):
     working_dir = os.getcwd()
     os.chdir(srcdir)
     static_dir = os.path.join(srcdir, "_static")
-    target_dir = os.path.join(srcdir, "nb_{}".format(gallery))
+    target_dir = os.path.join(srcdir, f"nb_{gallery}")
     image_dir = os.path.join(target_dir, "_images")
     source_dir = os.path.abspath(
         os.path.join(os.path.dirname(os.path.dirname(srcdir)), "notebooks")
@@ -182,8 +182,8 @@ def build_gallery(srcdir, gallery):
                 "thumb": os.path.basename(default_png_path),
             }
 
-    js_file = os.path.join(image_dir, "gallery_{}_contents.js".format(gallery))
-    with open(table_of_contents_file, "r") as toc:
+    js_file = os.path.join(image_dir, f"gallery_{gallery}_contents.js")
+    with open(table_of_contents_file) as toc:
         table_of_contents = toc.read()
 
     js_contents = "Gallery.examples = {}\n{}".format(
diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py
index a08abacb83d..07c17eb4e75 100644
--- a/pymc3/backends/base.py
+++ b/pymc3/backends/base.py
@@ -347,8 +347,8 @@ def __getitem__(self, idx):
             return self.get_sampler_stats(var, burn=burn, thin=thin)
         raise KeyError("Unknown variable %s" % var)
 
-    _attrs = set(['_straces', 'varnames', 'chains', 'stat_names',
-                  'supports_sampler_stats', '_report'])
+    _attrs = {'_straces', 'varnames', 'chains', 'stat_names',
+                  'supports_sampler_stats', '_report'}
 
     def __getattr__(self, name):
         # Avoid infinite recursion when called before __init__
@@ -417,7 +417,7 @@ def add_values(self, vals, overwrite=False) -> None:
                     self.varnames.remove(k)
                     new_var = 0
                 else:
-                    raise ValueError("Variable name {} already exists.".format(k))
+                    raise ValueError(f"Variable name {k} already exists.")
 
             self.varnames.append(k)
 
@@ -448,7 +448,7 @@ def remove_values(self, name):
         """
         varnames = self.varnames
         if name not in varnames:
-            raise KeyError("Unknown variable {}".format(name))
+            raise KeyError(f"Unknown variable {name}")
         self.varnames.remove(name)
         chains = self._straces
         for chain in chains.values():
diff --git a/pymc3/backends/hdf5.py b/pymc3/backends/hdf5.py
index a5ec2a173af..f3d2da2b795 100644
--- a/pymc3/backends/hdf5.py
+++ b/pymc3/backends/hdf5.py
@@ -140,7 +140,7 @@ def sampler_vars(self, values):
                             data.create_dataset(varname, (self.draws,), dtype=dtype, maxshape=(None,))
                 elif data.keys() != sampler.keys():
                     raise ValueError(
-                        "Sampler vars can't change, names incompatible: {} != {}".format(data.keys(), sampler.keys()))
+                        f"Sampler vars can't change, names incompatible: {data.keys()} != {sampler.keys()}")
             self.records_stats = True
 
     def setup(self, draws, chain, sampler_vars=None):
diff --git a/pymc3/backends/ndarray.py b/pymc3/backends/ndarray.py
index 27df021e917..035e29f08ab 100644
--- a/pymc3/backends/ndarray.py
+++ b/pymc3/backends/ndarray.py
@@ -176,7 +176,7 @@ def load(self, model: Model) -> 'NDArray':
             raise TraceDirectoryError("%s is not a trace directory" % self.directory)
 
         new_trace = NDArray(model=model)
-        with open(self.metadata_path, 'r') as buff:
+        with open(self.metadata_path) as buff:
             metadata = json.load(buff)
 
         metadata['_stats'] = [{k: np.array(v) for k, v in stat.items()} for stat in metadata['_stats']]
diff --git a/pymc3/backends/sqlite.py b/pymc3/backends/sqlite.py
index ad9bd3d0a13..e533aa0baf3 100644
--- a/pymc3/backends/sqlite.py
+++ b/pymc3/backends/sqlite.py
@@ -340,8 +340,8 @@ def load(name, model=None):
     db.connect()
     varnames = _get_table_list(db.cursor)
     if len(varnames) == 0:
-        raise ValueError(('Can not get variable list for database'
-                          '`{}`'.format(name)))
+        raise ValueError('Can not get variable list for database'
+                          '`{}`'.format(name))
     chains = _get_chain_list(db.cursor, varnames[0])
 
     straces = []
@@ -367,14 +367,14 @@ def _get_table_list(cursor):
 
 
 def _get_var_strs(cursor, varname):
-    cursor.execute('SELECT * FROM [{}]'.format(varname))
+    cursor.execute(f'SELECT * FROM [{varname}]')
     col_names = (col_descr[0] for col_descr in cursor.description)
     return [name for name in col_names if name.startswith('v')]
 
 
 def _get_chain_list(cursor, varname):
     """Return a list of sorted chains for `varname`."""
-    cursor.execute('SELECT DISTINCT chain FROM [{}]'.format(varname))
+    cursor.execute(f'SELECT DISTINCT chain FROM [{varname}]')
     chains = sorted([chain[0] for chain in cursor.fetchall()])
     return chains
 
diff --git a/pymc3/backends/text.py b/pymc3/backends/text.py
index 40cf3775231..129149336c7 100644
--- a/pymc3/backends/text.py
+++ b/pymc3/backends/text.py
@@ -91,7 +91,7 @@ def setup(self, draws, chain):
             self._fh.close()
 
         self.chain = chain
-        self.filename = os.path.join(self.name, 'chain-{}.csv'.format(chain))
+        self.filename = os.path.join(self.name, f'chain-{chain}.csv')
 
         cnames = [fv for v in self.varnames for fv in self.flat_names[v]]
 
@@ -201,7 +201,7 @@ def load(name, model=None):
     files = glob(os.path.join(name, 'chain-*.csv'))
 
     if len(files) == 0:
-        raise ValueError('No files present in directory {}'.format(name))
+        raise ValueError(f'No files present in directory {name}')
 
     straces = []
     for f in files:
@@ -249,7 +249,7 @@ def dump(name, trace, chains=None):
         chains = trace.chains
 
     for chain in chains:
-        filename = os.path.join(name, 'chain-{}.csv'.format(chain))
+        filename = os.path.join(name, f'chain-{chain}.csv')
         df = ttab.trace_to_dataframe(
             trace, chains=chain, include_transformed=True)
         df.to_csv(filename, index=False)
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index b7f39a177f4..5ccdb7f5d25 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -134,7 +134,7 @@ def assert_negative_support(var, label, distname, value=-1e-6):
             support = False
 
     if np.any(support):
-        msg = "The variable specified for {0} has negative support for {1}, ".format(
+        msg = "The variable specified for {} has negative support for {}, ".format(
             label, distname
         )
         msg += "likely making it unsuitable for this parameter."
@@ -294,7 +294,7 @@ def logcdf(self, value):
             tt.switch(
                 tt.eq(value, self.upper),
                 0,
-                tt.log((value - self.lower)) - tt.log((self.upper - self.lower)),
+                tt.log(value - self.lower) - tt.log(self.upper - self.lower),
             ),
         )
 
@@ -1887,7 +1887,7 @@ class StudentT(Continuous):
 
     def __init__(self, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        super(StudentT, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         if sd is not None:
             sigma = sd
             warnings.warn("sd is deprecated, use sigma instead", DeprecationWarning)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index e39c109b6bb..50b0ae5de98 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -60,7 +60,7 @@ def __new__(cls, name, *args, **kwargs):
                             "for a standalone distribution.")
 
         if not isinstance(name, string_types):
-            raise TypeError("Name needs to be a string but got: {}".format(name))
+            raise TypeError(f"Name needs to be a string but got: {name}")
 
         data = kwargs.pop('observed', None)
         cls.data = data
@@ -728,7 +728,7 @@ def draw_values(params, point=None, size=None):
         # test_distributions_random::TestDrawValues::test_draw_order fails without it
         # The remaining params that must be drawn are all hashable
         to_eval = set()
-        missing_inputs = set([j for j, p in symbolic_params])
+        missing_inputs = {j for j, p in symbolic_params}
         while to_eval or missing_inputs:
             if to_eval == missing_inputs:
                 raise ValueError('Cannot resolve inputs for {}'.format([get_var_name(params[j]) for j in to_eval]))
@@ -828,7 +828,7 @@ def vectorize_theano_function(f, inputs, output):
     """
     inputs_signatures = ",".join(
         [
-            get_vectorize_signature(var, var_name="i_{}".format(input_ind))
+            get_vectorize_signature(var, var_name=f"i_{input_ind}")
             for input_ind, var in enumerate(inputs)
         ]
     )
@@ -846,9 +846,9 @@ def get_vectorize_signature(var, var_name="i"):
         return "()"
     else:
         sig = ",".join(
-            ["{}_{}".format(var_name, axis_ind) for axis_ind in range(var.ndim)]
+            [f"{var_name}_{axis_ind}" for axis_ind in range(var.ndim)]
         )
-        return "({})".format(sig)
+        return f"({sig})"
 
 
 def _draw_value(param, point=None, givens=None, size=None):
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 3169aabe52c..421365d82b8 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -118,7 +118,7 @@ def __init__(self, w, comp_dists, *args, **kwargs):
             isinstance(comp_dists, Distribution)
             or (
                 isinstance(comp_dists, Iterable)
-                and all((isinstance(c, Distribution) for c in comp_dists))
+                and all(isinstance(c, Distribution) for c in comp_dists)
             )
         ):
             raise TypeError(
diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py
index ff54f94da2d..571a293d46f 100644
--- a/pymc3/distributions/posterior_predictive.py
+++ b/pymc3/distributions/posterior_predictive.py
@@ -88,7 +88,7 @@ def __init__(
             assert point_list is None and dict is None
             self.data = {}  # Dict[str, np.ndarray]
             self._len = sum(
-                (len(multi_trace._straces[chain]) for chain in multi_trace.chains)
+                len(multi_trace._straces[chain]) for chain in multi_trace.chains
             )
             self.varnames = multi_trace.varnames
             for vn in multi_trace.varnames:
@@ -153,7 +153,7 @@ def __getitem__(self, item: Union[slice, int]) -> "_TraceDict":
 
     def __getitem__(self, item):
         if isinstance(item, str):
-            return super(_TraceDict, self).__getitem__(item)
+            return super().__getitem__(item)
         elif isinstance(item, slice):
             return self._extract_slice(item)
         elif isinstance(item, int):
@@ -161,7 +161,7 @@ def __getitem__(self, item):
                 dict={k: np.atleast_1d(v[item]) for k, v in self.data.items()}
             )
         elif hasattr(item, "name"):
-            return super(_TraceDict, self).__getitem__(item.name)
+            return super().__getitem__(item.name)
         else:
             raise IndexError("Illegal index %s for _TraceDict" % str(item))
 
@@ -242,7 +242,7 @@ def fast_sample_posterior_predictive(
                 "Should not specify both keep_size and samples arguments"
             )
 
-        if isinstance(trace, list) and all((isinstance(x, dict) for x in trace)):
+        if isinstance(trace, list) and all(isinstance(x, dict) for x in trace):
             _trace = _TraceDict(point_list=trace)
         elif isinstance(trace, MultiTrace):
             _trace = _TraceDict(multi_trace=trace)
@@ -454,7 +454,7 @@ def draw_values(self) -> List[np.ndarray]:
             # test_distributions_random::TestDrawValues::test_draw_order fails without it
             # The remaining params that must be drawn are all hashable
             to_eval: Set[int] = set()
-            missing_inputs: Set[int] = set([j for j, p in self.symbolic_params])
+            missing_inputs: Set[int] = {j for j, p in self.symbolic_params}
 
             while to_eval or missing_inputs:
                 if to_eval == missing_inputs:
diff --git a/pymc3/distributions/shape_utils.py b/pymc3/distributions/shape_utils.py
index 66c251dfff4..b0ff463a05d 100644
--- a/pymc3/distributions/shape_utils.py
+++ b/pymc3/distributions/shape_utils.py
@@ -58,14 +58,14 @@ def _check_shape_type(shape):
         shape = np.atleast_1d(shape)
         for s in shape:
             if isinstance(s, np.ndarray) and s.ndim > 0:
-                raise TypeError("Value {} is not a valid integer".format(s))
+                raise TypeError(f"Value {s} is not a valid integer")
             o = int(s)
             if o != s:
-                raise TypeError("Value {} is not a valid integer".format(s))
+                raise TypeError(f"Value {s} is not a valid integer")
             out.append(o)
     except Exception:
         raise TypeError(
-            "Supplied value {} does not represent a valid shape".format(shape)
+            f"Supplied value {shape} does not represent a valid shape"
         )
     return tuple(out)
 
@@ -103,7 +103,7 @@ def shapes_broadcasting(*args, raise_exception=False):
             if raise_exception:
                 raise ValueError(
                     "Supplied shapes {} do not broadcast together".format(
-                        ", ".join(["{}".format(a) for a in args])
+                        ", ".join([f"{a}" for a in args])
                     )
                 )
             else:
@@ -165,7 +165,7 @@ def broadcast_dist_samples_shape(shapes, size=None):
         if broadcasted_shape is None:
             raise ValueError(
                 "Cannot broadcast provided shapes {} given size: {}".format(
-                    ", ".join(["{}".format(s) for s in shapes]), size
+                    ", ".join([f"{s}" for s in shapes]), size
                 )
             )
         return broadcasted_shape
@@ -181,7 +181,7 @@ def broadcast_dist_samples_shape(shapes, size=None):
     except ValueError:
         raise ValueError(
             "Cannot broadcast provided shapes {} given size: {}".format(
-                ", ".join(["{}".format(s) for s in shapes]), size
+                ", ".join([f"{s}" for s in shapes]), size
             )
         )
     broadcastable_shapes = []
diff --git a/pymc3/distributions/simulator.py b/pymc3/distributions/simulator.py
index e0e8e456fdc..026fc2f9bb6 100644
--- a/pymc3/distributions/simulator.py
+++ b/pymc3/distributions/simulator.py
@@ -32,7 +32,7 @@ def __init__(
         epsilon=1,
         **kwargs,
     ):
-        """
+        r"""
         This class stores a function defined by the user in Python language.
 
         function: function
@@ -125,7 +125,7 @@ def _str_repr(self, name=None, dist=None, formatting="plain"):
         distance = self.distance.__name__
 
         if formatting == "latex":
-            return f"$\\text{{{name}}} \sim  \\text{{Simulator}}(\\text{{{function}}}({params}), \\text{{{distance}}}, \\text{{{sum_stat}}})$"
+            return f"$\\text{{{name}}} \\sim  \\text{{Simulator}}(\\text{{{function}}}({params}), \\text{{{distance}}}, \\text{{{sum_stat}}})$"
         else:
             return f"{name} ~ Simulator({function}({params}), {distance}, {sum_stat})"
 
diff --git a/pymc3/examples/samplers_mvnormal.py b/pymc3/examples/samplers_mvnormal.py
index 2d5e919cbb5..669d9279527 100644
--- a/pymc3/examples/samplers_mvnormal.py
+++ b/pymc3/examples/samplers_mvnormal.py
@@ -53,7 +53,7 @@ def run(steppers, p):
             print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains))
             traces[name] = mt
             en = pm.ess(mt)
-            print('effective: {}\r\n'.format(en))
+            print(f'effective: {en}\r\n')
             if USE_XY:
                 effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
             else:
diff --git a/pymc3/exceptions.py b/pymc3/exceptions.py
index a9d02d94a92..62e8e278030 100644
--- a/pymc3/exceptions.py
+++ b/pymc3/exceptions.py
@@ -45,11 +45,11 @@ class ShapeError(Exception):
     """Error that the shape of a variable is incorrect."""
     def __init__(self, message, actual=None, expected=None):
         if actual is not None and expected is not None:
-            super().__init__('{} (actual {} != expected {})'.format(message, actual, expected))
+            super().__init__(f'{message} (actual {actual} != expected {expected})')
         elif actual is not None and expected is None:
-            super().__init__('{} (actual {})'.format(message, actual))
+            super().__init__(f'{message} (actual {actual})')
         elif actual is None and expected is not None:
-            super().__init__('{} (expected {})'.format(message, expected))
+            super().__init__(f'{message} (expected {expected})')
         else:
             super().__init__(message)
 
@@ -58,10 +58,10 @@ class DtypeError(TypeError):
     """Error that the dtype of a variable is incorrect."""
     def __init__(self, message, actual=None, expected=None):
         if actual is not None and expected is not None:
-            super().__init__('{} (actual {} != expected {})'.format(message, actual, expected))
+            super().__init__(f'{message} (actual {actual} != expected {expected})')
         elif actual is not None and expected is None:
-            super().__init__('{} (actual {})'.format(message, actual))
+            super().__init__(f'{message} (actual {actual})')
         elif actual is None and expected is not None:
-            super().__init__('{} (expected {})'.format(message, expected))
+            super().__init__(f'{message} (expected {expected})')
         else:
             super().__init__(message)
diff --git a/pymc3/glm/families.py b/pymc3/glm/families.py
index 42a5ca04365..82f391ed9e9 100644
--- a/pymc3/glm/families.py
+++ b/pymc3/glm/families.py
@@ -62,14 +62,14 @@ def _get_priors(self, model=None, name=''):
         dict: mapping name -> pymc3 distribution
         """
         if name:
-            name = '{}_'.format(name)
+            name = f'{name}_'
         model = modelcontext(model)
         priors = {}
         for key, val in self.priors.items():
             if isinstance(val, (numbers.Number, np.ndarray, np.generic)):
                 priors[key] = val
             else:
-                priors[key] = model.Var('{}{}'.format(name, key), val)
+                priors[key] = model.Var(f'{name}{key}', val)
 
         return priors
 
@@ -87,8 +87,8 @@ def create_likelihood(self, name, y_est, y_data, model=None):
         # Wrap y_est in link function
         priors[self.parent] = self.link(y_est)
         if name:
-            name = '{}_'.format(name)
-        return self.likelihood('{}y'.format(name), observed=y_data, **priors)
+            name = f'{name}_'
+        return self.likelihood(f'{name}y', observed=y_data, **priors)
 
     def __repr__(self):
         return """Family {klass}:
diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py
index 499b9729fdb..cc2c73d8dfb 100644
--- a/pymc3/gp/util.py
+++ b/pymc3/gp/util.py
@@ -44,10 +44,10 @@ def kmeans_inducing_points(n_inducing, X):
     elif isinstance(X, (np.ndarray, tuple, list)):
         X = np.asarray(X)
     else:
-        raise TypeError(("To use K-means initialization, "
+        raise TypeError("To use K-means initialization, "
                          "please provide X as a type that "
                          "can be cast to np.ndarray, instead "
-                         "of {}".format(type(X))))
+                         "of {}".format(type(X)))
     scaling = np.std(X, 0)
     # if std of a column is very small (zero), don't normalize that column
     scaling[scaling <= 1e-6] = 1.0
@@ -63,9 +63,9 @@ def make_getter(name):
             def getter(self):
                 value = getattr(self, name, None)
                 if value is None:
-                    raise AttributeError(("'{}' not set.  Provide as argument "
+                    raise AttributeError("'{}' not set.  Provide as argument "
                                           "to condition, or call 'prior' "
-                                          "first".format(name.lstrip("_"))))
+                                          "first".format(name.lstrip("_")))
                 else:
                     return value
                 return getattr(self, name)
diff --git a/pymc3/math.py b/pymc3/math.py
index 2a44453cfbb..770dbeebe2a 100644
--- a/pymc3/math.py
+++ b/pymc3/math.py
@@ -132,7 +132,7 @@ def kron_vector_op(v):
     if m.ndim == 1:
         m = m[:, None]  # Treat 1D array as Nx1 matrix
     if m.ndim != 2:  # Has not been tested otherwise
-        raise ValueError("m must have ndim <= 2, not {}".format(m.ndim))
+        raise ValueError(f"m must have ndim <= 2, not {m.ndim}")
     res = kron_vector_op(m)
     res_shape = res.shape
     return tt.reshape(res, (res_shape[1], res_shape[0])).T
@@ -261,7 +261,7 @@ def perform(self, node, inputs, outputs, params=None):
             log_det = np.sum(np.log(np.abs(s)))
             z[0] = np.asarray(log_det, dtype=x.dtype)
         except Exception:
-            print("Failed to compute logdet of {}.".format(x))
+            print(f"Failed to compute logdet of {x}.")
             raise
 
     def grad(self, inputs, g_outputs):
@@ -383,7 +383,7 @@ class BlockDiagonalMatrix(Op):
     def __init__(self, sparse=False, format="csr"):
         if format not in ("csr", "csc"):
             raise ValueError(
-                "format must be one of: 'csr', 'csc', got {}".format(format)
+                f"format must be one of: 'csr', 'csc', got {format}"
             )
         self.sparse = sparse
         self.format = format
diff --git a/pymc3/model.py b/pymc3/model.py
index 577e8833caa..b43b7f9d380 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -124,7 +124,7 @@ def incorporate_methods(source, destination, methods, wrapper=None, override=Fal
     for method in methods:
         if hasattr(destination, method) and not override:
             raise AttributeError(
-                "Cannot add method {!r}".format(method)
+                f"Cannot add method {method!r}"
                 + "to destination object as it already exists. "
                 "To prevent this error set 'override=True'."
             )
@@ -628,7 +628,7 @@ def __init__(
 
         self._grad_vars = grad_vars
         self._extra_vars = extra_vars
-        self._extra_var_names = set(var.name for var in extra_vars)
+        self._extra_var_names = {var.name for var in extra_vars}
         self._cost = cost
         self._ordering = ArrayOrdering(grad_vars)
         self.size = self._ordering.size
@@ -729,7 +729,7 @@ def array_to_dict(self, array):
         """Convert an array to a dictionary containing the grad_vars."""
         if array.shape != (self.size,):
             raise ValueError(
-                "Array should have shape (%s,) but has %s" % (self.size, array.shape)
+                f"Array should have shape ({self.size},) but has {array.shape}"
             )
         if array.dtype != self.dtype:
             raise ValueError(
@@ -1147,7 +1147,7 @@ def Var(self, name, dist, data=None, total_size=None, dims=None):
     def add_random_variable(self, var, dims=None):
         """Add a random variable to the named variables of the model."""
         if self.named_vars.tree_contains(var.name):
-            raise ValueError("Variable name {} already exists.".format(var.name))
+            raise ValueError(f"Variable name {var.name} already exists.")
 
         if dims is not None:
             if isinstance(dims, str):
@@ -1168,7 +1168,7 @@ def name_for(self, name):
         """
         if self.prefix:
             if not name.startswith(self.prefix):
-                return "{}{}".format(self.prefix, name)
+                return f"{self.prefix}{name}"
             else:
                 return name
         else:
@@ -1481,11 +1481,11 @@ def Point(*args, **kwargs):
     try:
         d = dict(*args, **kwargs)
     except Exception as e:
-        raise TypeError("can't turn {} and {} into a dict. {}".format(args, kwargs, e))
-    return dict(
-        (get_var_name(k), np.array(v)) for k, v in d.items()
+        raise TypeError(f"can't turn {args} and {kwargs} into a dict. {e}")
+    return {
+        get_var_name(k): np.array(v) for k, v in d.items()
          if get_var_name(k) in map(get_var_name, model.vars)
-    )
+    }
 
 
 class FastPointFunc:
diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py
index 4f67b624dad..93fc3a8aab3 100644
--- a/pymc3/model_graph.py
+++ b/pymc3/model_graph.py
@@ -114,8 +114,8 @@ def update_input_map(key: str, val: Set[VarName]):
                 try:
                     obs_name = var.observations.name
                     if obs_name:
-                        input_map[var_name] = input_map[var_name].difference(set([obs_name]))
-                        update_input_map(obs_name, set([var_name]))
+                        input_map[var_name] = input_map[var_name].difference({obs_name})
+                        update_input_map(obs_name, {var_name})
                 except AttributeError:
                     pass
         return input_map
@@ -147,7 +147,7 @@ def _make_node(self, var_name, graph):
             attrs['shape'] = 'box'
 
         graph.node(var_name.replace(':', '&'),
-                '{var_name}\n~\n{distribution}'.format(var_name=var_name, distribution=distribution),
+                f'{var_name}\n~\n{distribution}',
                 **attrs)
 
     def get_plates(self):
diff --git a/pymc3/ode/ode.py b/pymc3/ode/ode.py
index efec33873e4..21074e4822e 100644
--- a/pymc3/ode/ode.py
+++ b/pymc3/ode/ode.py
@@ -149,7 +149,7 @@ def __call__(self, y0, theta, return_sens=False, **kwargs):
         inputs = [y0, theta]
         for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)):
             if not input_val.type == itype:
-                raise ValueError('Input {} of type {} does not have the expected type of {}'.format(i, input_val.type, itype))
+                raise ValueError(f'Input {i} of type {input_val.type} does not have the expected type of {itype}')
         
         # use default implementation to prepare symbolic outputs (via make_node)
         states, sens = super(theano.Op, self).__call__(y0, theta, **kwargs)
diff --git a/pymc3/plots/__init__.py b/pymc3/plots/__init__.py
index a8077d53e27..56a3442678f 100644
--- a/pymc3/plots/__init__.py
+++ b/pymc3/plots/__init__.py
@@ -32,7 +32,7 @@ def map_args(func):
     def wrapped(*args, **kwargs):
         for (old, new) in swaps:
             if old in kwargs and new not in kwargs:
-                warnings.warn('Keyword argument `{old}` renamed to `{new}`, and will be removed in pymc3 3.8'.format(old=old, new=new))
+                warnings.warn(f'Keyword argument `{old}` renamed to `{new}`, and will be removed in pymc3 3.8')
                 kwargs[new] = kwargs.pop(old)
             return func(*args, **kwargs)
     return wrapped
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 4a061d80ad2..c134555be26 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -230,7 +230,7 @@ def _print_step_hierarchy(s, level=0):
                 for v in s.vars
             ]
         )
-        _log.info(">" * level + "{}: [{}]".format(s.__class__.__name__, varnames))
+        _log.info(">" * level + f"{s.__class__.__name__}: [{varnames}]")
 
 
 def sample(
@@ -536,7 +536,7 @@ def sample(
     parallel = cores > 1 and chains > 1 and not has_population_samplers
     t_start = time.time()
     if parallel:
-        _log.info("Multiprocess sampling ({} chains in {} jobs)".format(chains, cores))
+        _log.info(f"Multiprocess sampling ({chains} chains in {cores} jobs)")
         _print_step_hierarchy(step)
         try:
             trace = _mp_sample(**sample_args, **parallel_args)
@@ -559,7 +559,7 @@ def sample(
                     for m in (step.methods if isinstance(step, CompoundStep) else [step])
                 ]
             )
-            _log.info("Population sampling ({} chains)".format(chains))
+            _log.info(f"Population sampling ({chains} chains)")
             if has_demcmc and chains < 3:
                 raise ValueError(
                     "DEMetropolis requires at least 3 chains. "
@@ -576,7 +576,7 @@ def sample(
             _print_step_hierarchy(step)
             trace = _sample_population(**sample_args, parallelize=cores > 1)
         else:
-            _log.info("Sequential sampling ({} chains in 1 job)".format(chains))
+            _log.info(f"Sequential sampling ({chains} chains in 1 job)")
             _print_step_hierarchy(step)
             trace = _sample_many(**sample_args)
 
@@ -653,7 +653,7 @@ def _check_start_shape(model, start):
                     )
 
     if e != "":
-        raise ValueError("Bad shape for start argument:{}".format(e))
+        raise ValueError(f"Bad shape for start argument:{e}")
 
 
 def _sample_many(
@@ -1055,7 +1055,7 @@ def __init__(self, steppers, parallelize, progressbar=True):
                     process = multiprocessing.Process(
                         target=self.__class__._run_secondary,
                         args=(c, stepper_dumps, secondary_end),
-                        name="ChainWalker{}".format(c),
+                        name=f"ChainWalker{c}",
                     )
                     # we want the child process to exit if the parent is terminated
                     process.daemon = True
@@ -1134,7 +1134,7 @@ def _run_secondary(c, stepper_dumps, secondary_end):
                 update = stepper.step(population[c])
                 secondary_end.send(update)
         except Exception:
-            _log.exception("ChainWalker{}".format(c))
+            _log.exception(f"ChainWalker{c}")
         return
 
     def step(self, tune_stop, population):
@@ -1666,7 +1666,7 @@ def sample_posterior_predictive(
     if samples is None:
         if isinstance(_trace, MultiTrace):
             samples = sum(len(v) for v in _trace._straces.values())
-        elif isinstance(_trace, list) and all((isinstance(x, dict) for x in _trace)):
+        elif isinstance(_trace, list) and all(isinstance(x, dict) for x in _trace):
             # this is a list of points
             samples = len(_trace)
         else:
@@ -1833,7 +1833,7 @@ def sample_posterior_predictive_w(
     obs = [x for m in models for x in m.observed_RVs]
     variables = np.repeat(obs, n)
 
-    lengths = list(set([np.atleast_1d(observed).shape for observed in obs]))
+    lengths = list({np.atleast_1d(observed).shape for observed in obs})
 
     if len(lengths) == 1:
         size = [None for i in variables]
@@ -2015,7 +2015,7 @@ def init_nuts(
     if init == "auto":
         init = "jitter+adapt_diag"
 
-    _log.info("Initializing NUTS using {}...".format(init))
+    _log.info(f"Initializing NUTS using {init}...")
 
     if random_seed is not None:
         random_seed = int(np.atleast_1d(random_seed)[0])
@@ -2129,7 +2129,7 @@ def init_nuts(
         cov = np.eye(model.ndim)
         potential = quadpotential.QuadPotentialFullAdapt(model.ndim, mean, cov, 10)
     else:
-        raise ValueError("Unknown initializer: {}.".format(init))
+        raise ValueError(f"Unknown initializer: {init}.")
 
     step = pm.NUTS(potential=potential, model=model, **kwargs)
 
diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py
index 33b297c2269..31b6500d1b0 100644
--- a/pymc3/smc/sample_smc.py
+++ b/pymc3/smc/sample_smc.py
@@ -147,10 +147,8 @@ def sample_smc(
         cores = 1
 
     _log.info(
-        (
             f"Multiprocess sampling ({chains} chain{'s' if chains > 1 else ''} "
             f"in {cores} job{'s' if cores > 1 else ''})"
-        )
     )
 
     if random_seed == -1:
@@ -196,7 +194,7 @@ def sample_smc(
     else:
         results = []
         for i in range(chains):
-            results.append((sample_smc_int(*params, random_seed[i], i, _log)))
+            results.append(sample_smc_int(*params, random_seed[i], i, _log))
 
     traces, sim_data, log_marginal_likelihoods, betas, accept_ratios, nsteps = zip(*results)
     trace = MultiTrace(traces)
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index b22fd094ff7..c55f12a365c 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -584,7 +584,7 @@ def update(self, sample, grad, tune):
 
     def raise_ok(self, vmap):
         if self._chol_error is not None:
-            raise ValueError("{0}".format(self._chol_error))
+            raise ValueError(str(self._chol_error))
 
 
 class _WeightedCovariance:
diff --git a/pymc3/tests/backend_fixtures.py b/pymc3/tests/backend_fixtures.py
index eb22172440e..18378212092 100644
--- a/pymc3/tests/backend_fixtures.py
+++ b/pymc3/tests/backend_fixtures.py
@@ -109,7 +109,7 @@ def test_bad_dtype(self):
             strace.setup(self.draws, self.chain, bad_vars)
         strace.setup(self.draws, self.chain, good_vars)
         if strace.supports_sampler_stats:
-            assert strace.stat_names == set(['a'])
+            assert strace.stat_names == {'a'}
         else:
             with pytest.raises((ValueError, TypeError)):
                 strace.setup(self.draws, self.chain, good_vars)
@@ -197,9 +197,9 @@ def setup_class(cls):
             point1 = {varname: cls.expected[1][varname][idx, ...]
                       for varname in varnames}
             if cls.sampler_vars is not None:
-                stats1 = [dict((key, val[idx]) for key, val in stats.items())
+                stats1 = [{key: val[idx] for key, val in stats.items()}
                           for stats in cls.expected_stats[0]]
-                stats2 = [dict((key, val[idx]) for key, val in stats.items())
+                stats2 = [{key: val[idx] for key, val in stats.items()}
                           for stats in cls.expected_stats[1]]
                 strace0.record(point=point0, sampler_stats=stats1)
                 strace1.record(point=point1, sampler_stats=stats2)
@@ -247,7 +247,7 @@ def record_point(self, val):
         point = {varname: np.tile(val, value.shape)
                  for varname, value in self.test_point.items()}
         if self.sampler_vars is not None:
-            stats = [dict((key, dtype(val)) for key, dtype in vars.items())
+            stats = [{key: dtype(val) for key, dtype in vars.items()}
                      for vars in self.sampler_vars]
             self.strace.record(point=point, sampler_stats=stats)
         else:
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index b4d4338f1e6..f7492208fdd 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -253,7 +253,7 @@ def test_broadcast_shape(self, size):
         def test_different_shapes_and_sample_sizes(self, shape):
             prefix = self.distribution.__name__
 
-            rv = self.get_random_variable(shape, name='%s_%s' % (prefix, shape))
+            rv = self.get_random_variable(shape, name=f'{prefix}_{shape}')
             for size in (None, 1, 5, (4, 5)):
                 if size is None:
                     s = []
diff --git a/pymc3/tests/test_missing.py b/pymc3/tests/test_missing.py
index 1b13b5f6412..cd5bc5076e7 100644
--- a/pymc3/tests/test_missing.py
+++ b/pymc3/tests/test_missing.py
@@ -32,7 +32,7 @@ def test_missing():
 
     with model:
         prior_trace = sample_prior_predictive()
-    assert set(['x', 'y']) <= set(prior_trace.keys())
+    assert {'x', 'y'} <= set(prior_trace.keys())
 
 
 def test_missing_pandas():
@@ -49,7 +49,7 @@ def test_missing_pandas():
 
     with model:
         prior_trace = sample_prior_predictive()
-    assert set(['x', 'y']) <= set(prior_trace.keys())
+    assert {'x', 'y'} <= set(prior_trace.keys())
 
 def test_missing_with_predictors():
     predictors = array([0.5, 1, 0.5, 2, 0.3])
@@ -66,7 +66,7 @@ def test_missing_with_predictors():
 
     with model:
         prior_trace = sample_prior_predictive()
-    assert set(['x', 'y']) <= set(prior_trace.keys())
+    assert {'x', 'y'} <= set(prior_trace.keys())
 
 
 def test_missing_dual_observations():
@@ -82,7 +82,7 @@ def test_missing_dual_observations():
             ovar2 = Normal('o2', mu=beta2 * latent, observed=obs2)
 
         prior_trace = sample_prior_predictive()
-        assert set(['beta1', 'beta2', 'theta', 'o1', 'o2']) <= set(prior_trace.keys())
+        assert {'beta1', 'beta2', 'theta', 'o1', 'o2'} <= set(prior_trace.keys())
         sample()
 
 def test_internal_missing_observations():
@@ -95,5 +95,5 @@ def test_internal_missing_observations():
             theta2 = Normal('theta2', mu=theta1, observed=obs2)
 
         prior_trace = sample_prior_predictive()
-        assert set(['theta1', 'theta2']) <= set(prior_trace.keys())
+        assert {'theta1', 'theta2'} <= set(prior_trace.keys())
         sample()
diff --git a/pymc3/tests/test_ode.py b/pymc3/tests/test_ode.py
index 7ef1b6234c8..10b385592f2 100644
--- a/pymc3/tests/test_ode.py
+++ b/pymc3/tests/test_ode.py
@@ -89,7 +89,7 @@ def ode_func(y, t, p):
     np.testing.assert_allclose(y, simulated_y, rtol=1e-5)
 
 
-class TestSensitivityInitialCondition(object):
+class TestSensitivityInitialCondition:
 
     t = np.arange(0, 12, 0.25).reshape(-1, 1)
 
@@ -230,7 +230,7 @@ def system_1(y, t, p):
     np.testing.assert_allclose(manual_logp, pymc3_logp)
 
 
-class TestErrors(object):
+class TestErrors:
     """Test running model for a scalar ODE with 1 parameter"""
 
     def system(y, t, p):
@@ -281,7 +281,7 @@ def test_number_of_params(self):
             )
 
 
-class TestDiffEqModel(object):
+class TestDiffEqModel:
     def test_op_equality(self):
         """Tests that the equality of mathematically identical Ops evaluates True"""
 
diff --git a/pymc3/tests/test_posterior_predictive.py b/pymc3/tests/test_posterior_predictive.py
index 460a2ba4c52..f2f1571b80c 100644
--- a/pymc3/tests/test_posterior_predictive.py
+++ b/pymc3/tests/test_posterior_predictive.py
@@ -10,7 +10,7 @@ def test_translate_point_list():
         a = pm.Normal("a", mu=mu, sigma=1, observed=0.0)
         mt = point_list_to_multitrace([model.test_point], model)
         assert isinstance(mt, pm.backends.base.MultiTrace)
-        assert set(["mu"]) == set(mt.varnames)
+        assert {"mu"} == set(mt.varnames)
         assert len(mt) == 1
 
 def test_build_TraceDict():
@@ -22,7 +22,7 @@ def test_build_TraceDict():
       assert isinstance(dict, _TraceDict)
       assert len(dict) == 1000
       np.testing.assert_array_equal(trace['mu'], dict['mu'])
-      assert set(trace.varnames) == set(dict.varnames) == set(["mu"])
+      assert set(trace.varnames) == set(dict.varnames) == {"mu"}
 
 
 def test_build_TraceDict_point_list():
@@ -30,7 +30,7 @@ def test_build_TraceDict_point_list():
       mu = pm.Normal("mu", 0.0, 1.0)
       a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
       dict = _TraceDict(point_list=[model.test_point])
-      assert set(dict.varnames) == set(["mu"])
+      assert set(dict.varnames) == {"mu"}
       assert len(dict) == 1
       assert len(dict["mu"]) == 1
       assert dict["mu"][0] == 0.0
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 6c6baf57795..a5574229417 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -20,7 +20,7 @@
 try:
     import unittest.mock as mock  # py3
 except ImportError:
-    import mock
+    from unittest import mock
 
 import numpy.testing as npt
 import arviz as az
diff --git a/pymc3/tests/test_shape_handling.py b/pymc3/tests/test_shape_handling.py
index 680a7804b52..5456f0a1cc7 100644
--- a/pymc3/tests/test_shape_handling.py
+++ b/pymc3/tests/test_shape_handling.py
@@ -160,7 +160,7 @@ def test_broadcast_dist_samples_shape(self, fixture_sizes, fixture_shapes):
         except ValueError:
             expected_out = None
         if expected_out is not None and any(
-            (s[: min([len(size_), len(s)])] == size_ for s in shapes)
+            s[: min([len(size_), len(s)])] == size_ for s in shapes
         ):
             expected_out = size_ + expected_out
         if expected_out is None:
@@ -176,7 +176,7 @@ def test_broadcast_distribution_samples(self, samples_to_broadcast):
         size, samples, broadcast_shape = samples_to_broadcast
         if broadcast_shape is not None:
             outs = broadcast_distribution_samples(samples, size=size)
-            assert all((o.shape == broadcast_shape for o in outs))
+            assert all(o.shape == broadcast_shape for o in outs)
         else:
             with pytest.raises(ValueError):
                 broadcast_distribution_samples(samples, size=size)
@@ -209,7 +209,7 @@ def test_broadcast_dist_samples_to(self, samples_to_broadcast_to):
         to_shape, size, samples, broadcast_shape = samples_to_broadcast_to
         if broadcast_shape is not None:
             outs = broadcast_dist_samples_to(to_shape, samples, size=size)
-            assert all((o.shape == broadcast_shape for o in outs))
+            assert all(o.shape == broadcast_shape for o in outs)
         else:
             with pytest.raises(ValueError):
                 broadcast_dist_samples_to(to_shape, samples, size=size)
diff --git a/pymc3/tests/test_theanof.py b/pymc3/tests/test_theanof.py
index 3c029d6c547..94f2508841a 100644
--- a/pymc3/tests/test_theanof.py
+++ b/pymc3/tests/test_theanof.py
@@ -68,7 +68,7 @@ def np_take_along_axis(arr, indices, axis):
                 _axis = axis
             if _axis < 0 or _axis >= arr.ndim:
                 raise ValueError(
-                    "Supplied axis {} is out of bounds".format(axis)
+                    f"Supplied axis {axis} is out of bounds"
                 )
             return arr[_make_along_axis_idx(arr.shape, indices, _axis)]
 
diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py
index 5b8cddb393b..97b3e43e6f4 100644
--- a/pymc3/tests/test_variational_inference.py
+++ b/pymc3/tests/test_variational_inference.py
@@ -130,7 +130,7 @@ def test_init_groups(three_var_model, raises, grouping):
             if g is None:
                 pass
             else:
-                assert set(pm.util.get_transformed(z) for z in g) == set(ig.group)
+                assert {pm.util.get_transformed(z) for z in g} == set(ig.group)
         else:
             assert approx.ndim == three_var_model.ndim
 
@@ -143,7 +143,7 @@ def test_init_groups(three_var_model, raises, grouping):
         ({}, {MeanFieldGroup: (['one'], {}), FullRankGroup: (['two', 'three'], {})}),
         ({}, {MeanFieldGroup: (['one'], {}), EmpiricalGroup: (['two', 'three'], {'size': 100})})
 ],
-    ids=lambda t: ', '.join('%s: %s' % (k.__name__, v[0]) for k, v in t[1].items())
+    ids=lambda t: ', '.join('{}: {}'.format(k.__name__, v[0]) for k, v in t[1].items())
 )
 def three_var_groups(request, three_var_model):
     kw, grouping = request.param
diff --git a/pymc3/theanof.py b/pymc3/theanof.py
index 03350b4515d..080115c680c 100644
--- a/pymc3/theanof.py
+++ b/pymc3/theanof.py
@@ -490,9 +490,9 @@ def ix_(*args):
 
 
 def largest_common_dtype(tensors):
-    dtypes = set(str(t.dtype) if hasattr(t, 'dtype')
+    dtypes = {str(t.dtype) if hasattr(t, 'dtype')
                  else smartfloatX(np.asarray(t)).dtype
-                 for t in tensors)
+                 for t in tensors}
     return np.stack([np.ones((), dtype=dtype) for dtype in dtypes]).dtype
 
 
diff --git a/pymc3/util.py b/pymc3/util.py
index 2b9bfa440be..4f3cdd88cb4 100644
--- a/pymc3/util.py
+++ b/pymc3/util.py
@@ -66,7 +66,7 @@ def get_transformed_name(name, transform):
     str
         A string to use for the transformed variable
     """
-    return "{}_{}__".format(name, transform.name)
+    return f"{name}_{transform.name}__"
 
 
 def is_transformed_name(name):
@@ -101,7 +101,7 @@ def get_untransformed_name(name):
         String with untransformed version of the name.
     """
     if not is_transformed_name(name):
-        raise ValueError("{} does not appear to be a transformed name".format(name))
+        raise ValueError(f"{name} does not appear to be a transformed name")
     return "_".join(name.split("_")[:-3])
 
 
@@ -150,7 +150,7 @@ def get_repr_for_variable(variable, formatting="plain"):
         return "array"
 
     if formatting == "latex":
-        return r"\text{{{name}}}".format(name=name)
+        return fr"\text{{{name}}}"
     else:
         return name
 
diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py
index de34fb95eac..3f3293d68d6 100644
--- a/pymc3/variational/approximations.py
+++ b/pymc3/variational/approximations.py
@@ -340,7 +340,7 @@ def __str__(self):
             shp = ', '.join(map(str, self.histogram.shape.eval()))
         else:
             shp = 'None, ' + str(self.ddim)
-        return '{cls}[{shp}]'.format(shp=shp, cls=self.__class__.__name__)
+        return f'{self.__class__.__name__}[{shp}]'
 
 
 class NormalizingFlowGroup(Group):
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 457bba1c372..b7ca96cdc5e 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -241,10 +241,10 @@ def _infmean(input_array):
                 if i % 10 == 0:
                     avg_loss = _infmean(scores[max(0, i - 1000): i + 1])
                     if hasattr(progress, 'comment'):
-                        progress.comment = "Average Loss = {:,.5g}".format(avg_loss)
+                        progress.comment = f"Average Loss = {avg_loss:,.5g}"
                     avg_loss = scores[max(0, i - 1000): i + 1].mean()
                     if hasattr(progress, 'comment'):
-                        progress.comment = "Average Loss = {:,.5g}".format(avg_loss)
+                        progress.comment = f"Average Loss = {avg_loss:,.5g}"
                 for callback in callbacks:
                     callback(self.approx, scores[: i + 1], i + s + 1)
         except (KeyboardInterrupt, StopIteration) as e:  # pragma: no cover
@@ -270,7 +270,7 @@ def _infmean(input_array):
                 logger.info("Finished [100%]: Loss = {:,.5g}".format(scores[-1]))
             else:
                 avg_loss = _infmean(scores[max(0, i - 1000): i + 1])
-                logger.info("Finished [100%]: Average Loss = {:,.5g}".format(avg_loss))
+                logger.info(f"Finished [100%]: Average Loss = {avg_loss:,.5g}")
         self.hist = np.concatenate([self.hist, scores])
         return State(i + s, step=step_func, callbacks=callbacks, score=True)
 
diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py
index 6679fe73551..5d512afdfac 100644
--- a/pymc3/variational/opvi.py
+++ b/pymc3/variational/opvi.py
@@ -156,7 +156,7 @@ class ObjectiveUpdates(theano.OrderedUpdates):
 
 
 def _warn_not_used(smth, where):
-    warnings.warn('`%s` is not used for %s and ignored' % (smth, where))
+    warnings.warn(f'`{smth}` is not used for {where} and ignored')
 
 
 class ObjectiveFunction:
@@ -1207,7 +1207,7 @@ def __str__(self):
                 shp = 'None, ' + shp
             elif self.batched:
                 shp = str(self.bdim) + ', ' + shp
-        return '{cls}[{shp}]'.format(shp=shp, cls=self.__class__.__name__)
+        return f'{self.__class__.__name__}[{shp}]'
 
     @node_property
     def std(self):
@@ -1580,7 +1580,7 @@ def sample_dict_fn(self):
 
         def inner(draws=100):
             _samples = sample_fn(draws)
-            return dict([(v_.name, s_) for v_, s_ in zip(self.model.free_RVs, _samples)])
+            return {v_.name: s_ for v_, s_ in zip(self.model.free_RVs, _samples)}
 
         return inner
 
diff --git a/pymc3/vartypes.py b/pymc3/vartypes.py
index d43ff5d52bd..08fda9629fb 100644
--- a/pymc3/vartypes.py
+++ b/pymc3/vartypes.py
@@ -28,13 +28,13 @@
     "theano_constant",
 ]
 
-bool_types = set(["int8"])
+bool_types = {"int8"}
 
-int_types = set(
-    ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"]
-)
-float_types = set(["float32", "float64"])
-complex_types = set(["complex64", "complex128"])
+int_types = {
+    "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"
+}
+float_types = {"float32", "float64"}
+complex_types = {"complex64", "complex128"}
 continuous_types = float_types | complex_types
 discrete_types = bool_types | int_types
 
diff --git a/setup.py b/setup.py
index b7cda72df32..9d5071f64b7 100755
--- a/setup.py
+++ b/setup.py
@@ -55,13 +55,13 @@
 
 def get_version():
     VERSIONFILE = join("pymc3", "__init__.py")
-    lines = open(VERSIONFILE, "rt").readlines()
+    lines = open(VERSIONFILE).readlines()
     version_regex = r"^__version__ = ['\"]([^'\"]*)['\"]"
     for line in lines:
         mo = re.search(version_regex, line, re.M)
         if mo:
             return mo.group(1)
-    raise RuntimeError("Unable to find version in %s." % (VERSIONFILE,))
+    raise RuntimeError(f"Unable to find version in {VERSIONFILE}.")
 
 
 if __name__ == "__main__":