diff --git a/lib/iris/tests/__init__.py b/lib/iris/tests/__init__.py
index c1df4f628b..cfcfeb65e8 100644
--- a/lib/iris/tests/__init__.py
+++ b/lib/iris/tests/__init__.py
@@ -11,15 +11,8 @@
 
 The primary class for this module is :class:`IrisTest`.
 
-By default, this module sets the matplotlib backend to "agg". But when
-this module is imported it checks ``sys.argv`` for the flag "-d". If
-found, it is removed from ``sys.argv`` and the matplotlib backend is
-switched to "tkagg" to allow the interactive visual inspection of
-graphical test results.
-
 """
 
-import codecs
 import collections
 from collections.abc import Mapping
 import contextlib
@@ -38,40 +31,23 @@
 import shutil
 import subprocess
 import sys
-import threading
-from typing import Dict, List
 import unittest
 from unittest import mock
 import warnings
 import xml.dom.minidom
 import zlib
 
-import filelock
 import numpy as np
 import numpy.ma as ma
 import requests
 
 import iris.config
 import iris.cube
+import iris.tests.graphics as graphics
 import iris.util
 
-# Test for availability of matplotlib.
-# (And remove matplotlib as an iris.tests dependency.)
-try:
-    import matplotlib
-
-    # Override any user settings e.g. from matplotlibrc file.
-    matplotlib.rcdefaults()
-    # Set backend *after* rcdefaults, as we don't want that overridden (#3846).
-    matplotlib.use("agg")
-    # Standardise the figure size across matplotlib versions.
-    # This permits matplotlib png image comparison.
-    matplotlib.rcParams["figure.figsize"] = [8.0, 6.0]
-    import matplotlib.pyplot as plt
-except ImportError:
-    MPL_AVAILABLE = False
-else:
-    MPL_AVAILABLE = True
+MPL_AVAILABLE = graphics.MPL_AVAILABLE
+
 
 try:
     from osgeo import gdal  # noqa
@@ -111,10 +87,6 @@
 
 #: Basepath for test results.
 _RESULT_PATH = os.path.join(os.path.dirname(__file__), "results")
-#: Default perceptual hash size.
-_HASH_SIZE = 16
-#: Default maximum perceptual hash hamming distance.
-_HAMMING_DISTANCE = 2
 
 if "--data-files-used" in sys.argv:
     sys.argv.remove("--data-files-used")
@@ -131,18 +103,6 @@
     os.environ["IRIS_TEST_CREATE_MISSING"] = "true"
 
 
-# Whether to display matplotlib output to the screen.
-_DISPLAY_FIGURES = False
-
-if MPL_AVAILABLE and "-d" in sys.argv:
-    sys.argv.remove("-d")
-    plt.switch_backend("tkagg")
-    _DISPLAY_FIGURES = True
-
-# Threading non re-entrant blocking lock to ensure thread-safe plotting.
-_lock = threading.Lock()
-
-
 def main():
     """A wrapper for unittest.main() which adds iris.test specific options to the help (-h) output."""
     if "-h" in sys.argv or "--help" in sys.argv:
@@ -179,43 +139,6 @@ def main():
         unittest.main()
 
 
-def get_data_path(relative_path):
-    """
-    Return the absolute path to a data file when given the relative path
-    as a string, or sequence of strings.
-
-    """
-    if not isinstance(relative_path, str):
-        relative_path = os.path.join(*relative_path)
-    test_data_dir = iris.config.TEST_DATA_DIR
-    if test_data_dir is None:
-        test_data_dir = ""
-    data_path = os.path.join(test_data_dir, relative_path)
-
-    if _EXPORT_DATAPATHS_FILE is not None:
-        _EXPORT_DATAPATHS_FILE.write(data_path + "\n")
-
-    if isinstance(data_path, str) and not os.path.exists(data_path):
-        # if the file is gzipped, ungzip it and return the path of the ungzipped
-        # file.
-        gzipped_fname = data_path + ".gz"
-        if os.path.exists(gzipped_fname):
-            with gzip.open(gzipped_fname, "rb") as gz_fh:
-                try:
-                    with open(data_path, "wb") as fh:
-                        fh.writelines(gz_fh)
-                except IOError:
-                    # Put ungzipped data file in a temporary path, since we
-                    # can't write to the original path (maybe it is owned by
-                    # the system.)
-                    _, ext = os.path.splitext(data_path)
-                    data_path = iris.util.create_temp_filename(suffix=ext)
-                    with open(data_path, "wb") as fh:
-                        fh.writelines(gz_fh)
-
-    return data_path
-
-
 class IrisTest_nometa(unittest.TestCase):
     """A subclass of unittest.TestCase which provides Iris specific testing functionality."""
 
@@ -250,6 +173,43 @@ def _assert_str_same(
                 % (type_comparison_name, reference_filename, diff)
             )
 
+    @staticmethod
+    def get_data_path(relative_path):
+        """
+        Return the absolute path to a data file when given the relative path
+        as a string, or sequence of strings.
+
+        """
+        if not isinstance(relative_path, str):
+            relative_path = os.path.join(*relative_path)
+        test_data_dir = iris.config.TEST_DATA_DIR
+        if test_data_dir is None:
+            test_data_dir = ""
+        data_path = os.path.join(test_data_dir, relative_path)
+
+        if _EXPORT_DATAPATHS_FILE is not None:
+            _EXPORT_DATAPATHS_FILE.write(data_path + "\n")
+
+        if isinstance(data_path, str) and not os.path.exists(data_path):
+            # if the file is gzipped, ungzip it and return the path of the ungzipped
+            # file.
+            gzipped_fname = data_path + ".gz"
+            if os.path.exists(gzipped_fname):
+                with gzip.open(gzipped_fname, "rb") as gz_fh:
+                    try:
+                        with open(data_path, "wb") as fh:
+                            fh.writelines(gz_fh)
+                    except IOError:
+                        # Put ungzipped data file in a temporary path, since we
+                        # can't write to the original path (maybe it is owned by
+                        # the system.)
+                        _, ext = os.path.splitext(data_path)
+                        data_path = iris.util.create_temp_filename(suffix=ext)
+                        with open(data_path, "wb") as fh:
+                            fh.writelines(gz_fh)
+
+        return data_path
+
     @staticmethod
     def get_result_path(relative_path):
         """
@@ -872,137 +832,7 @@ def check_graphic(self):
         output directory, and the imagerepo.json file being updated.
 
         """
-        from PIL import Image
-        import imagehash
-
-        dev_mode = os.environ.get("IRIS_TEST_CREATE_MISSING")
-        unique_id = self._unique_id()
-        repo_fname = os.path.join(_RESULT_PATH, "imagerepo.json")
-        with open(repo_fname, "rb") as fi:
-            repo: Dict[str, List[str]] = json.load(
-                codecs.getreader("utf-8")(fi)
-            )
-
-        try:
-            #: The path where the images generated by the tests should go.
-            image_output_directory = os.path.join(
-                os.path.dirname(__file__), "result_image_comparison"
-            )
-            if not os.access(image_output_directory, os.W_OK):
-                if not os.access(os.getcwd(), os.W_OK):
-                    raise IOError(
-                        "Write access to a local disk is required "
-                        "to run image tests.  Run the tests from a "
-                        "current working directory you have write "
-                        "access to to avoid this issue."
-                    )
-                else:
-                    image_output_directory = os.path.join(
-                        os.getcwd(), "iris_image_test_output"
-                    )
-            result_fname = os.path.join(
-                image_output_directory, "result-" + unique_id + ".png"
-            )
-
-            if not os.path.isdir(image_output_directory):
-                # Handle race-condition where the directories are
-                # created sometime between the check above and the
-                # creation attempt below.
-                try:
-                    os.makedirs(image_output_directory)
-                except OSError as err:
-                    # Don't care about "File exists"
-                    if err.errno != 17:
-                        raise
-
-            def _create_missing():
-                fname = "{}.png".format(phash)
-                base_uri = (
-                    "https://scitools.github.io/test-iris-imagehash/"
-                    "images/v4/{}"
-                )
-                uri = base_uri.format(fname)
-                hash_fname = os.path.join(image_output_directory, fname)
-                uris = repo.setdefault(unique_id, [])
-                uris.append(uri)
-                print("Creating image file: {}".format(hash_fname))
-                figure.savefig(hash_fname)
-                msg = "Creating imagerepo entry: {} -> {}"
-                print(msg.format(unique_id, uri))
-                lock = filelock.FileLock(
-                    os.path.join(_RESULT_PATH, "imagerepo.lock")
-                )
-                # The imagerepo.json file is a critical resource, so ensure
-                # thread safe read/write behaviour via platform independent
-                # file locking.
-                with lock.acquire(timeout=600):
-                    with open(repo_fname, "wb") as fo:
-                        json.dump(
-                            repo,
-                            codecs.getwriter("utf-8")(fo),
-                            indent=4,
-                            sort_keys=True,
-                        )
-
-            # Calculate the test result perceptual image hash.
-            buffer = io.BytesIO()
-            figure = plt.gcf()
-            figure.savefig(buffer, format="png")
-            buffer.seek(0)
-            phash = imagehash.phash(Image.open(buffer), hash_size=_HASH_SIZE)
-
-            if unique_id not in repo:
-                # The unique id might not be fully qualified, e.g.
-                # expects iris.tests.test_quickplot.TestLabels.test_contour.0,
-                # but got test_quickplot.TestLabels.test_contour.0
-                # if we find single partial match from end of the key
-                # then use that, else fall back to the unknown id state.
-                matches = [key for key in repo if key.endswith(unique_id)]
-                if len(matches) == 1:
-                    unique_id = matches[0]
-
-            if unique_id in repo:
-                uris = repo[unique_id]
-                # Extract the hex basename strings from the uris.
-                hexes = [
-                    os.path.splitext(os.path.basename(uri))[0] for uri in uris
-                ]
-                # Create the expected perceptual image hashes from the uris.
-                to_hash = imagehash.hex_to_hash
-                expected = [to_hash(uri_hex) for uri_hex in hexes]
-
-                # Calculate hamming distance vector for the result hash.
-                distances = [e - phash for e in expected]
-
-                if np.all([hd > _HAMMING_DISTANCE for hd in distances]):
-                    if dev_mode:
-                        _create_missing()
-                    else:
-                        figure.savefig(result_fname)
-                        msg = (
-                            "Bad phash {} with hamming distance {} "
-                            "for test {}."
-                        )
-                        msg = msg.format(phash, distances, unique_id)
-                        if _DISPLAY_FIGURES:
-                            emsg = "Image comparison would have failed: {}"
-                            print(emsg.format(msg))
-                        else:
-                            emsg = "Image comparison failed: {}"
-                            raise AssertionError(emsg.format(msg))
-            else:
-                if dev_mode:
-                    _create_missing()
-                else:
-                    figure.savefig(result_fname)
-                    emsg = "Missing image test result: {}."
-                    raise AssertionError(emsg.format(unique_id))
-
-            if _DISPLAY_FIGURES:
-                plt.show()
-
-        finally:
-            plt.close()
+        graphics.check_graphic(self)
 
     def _remove_testcase_patches(self):
         """Helper to remove per-testcase patches installed by :meth:`patch`."""
@@ -1214,37 +1044,15 @@ class IrisTest(IrisTest_nometa, metaclass=_TestTimingsMetaclass):
     pass
 
 
+get_data_path = IrisTest.get_data_path
 get_result_path = IrisTest.get_result_path
 
 
-class GraphicsTestMixin:
-
-    # nose directive: dispatch tests concurrently.
-    _multiprocess_can_split_ = True
-
-    def setUp(self):
-        # Acquire threading non re-entrant blocking lock to ensure
-        # thread-safe plotting.
-        _lock.acquire()
-        # Make sure we have no unclosed plots from previous tests before
-        # generating this one.
-        if MPL_AVAILABLE:
-            plt.close("all")
-
-    def tearDown(self):
-        # If a plotting test bombs out it can leave the current figure
-        # in an odd state, so we make sure it's been disposed of.
-        if MPL_AVAILABLE:
-            plt.close("all")
-        # Release the non re-entrant blocking lock.
-        _lock.release()
-
-
-class GraphicsTest(GraphicsTestMixin, IrisTest):
+class GraphicsTest(graphics.GraphicsTestMixin, IrisTest):
     pass
 
 
-class GraphicsTest_nometa(GraphicsTestMixin, IrisTest_nometa):
+class GraphicsTest_nometa(graphics.GraphicsTestMixin, IrisTest_nometa):
     # Graphicstest without the metaclass providing test timings.
     pass
 
@@ -1290,23 +1098,7 @@ class MyGeoTiffTests(test.IrisTest):
     return skip(fn)
 
 
-def skip_plot(fn):
-    """
-    Decorator to choose whether to run tests, based on the availability of the
-    matplotlib library.
-
-    Example usage:
-        @skip_plot
-        class MyPlotTests(test.GraphicsTest):
-            ...
-
-    """
-    skip = unittest.skipIf(
-        condition=not MPL_AVAILABLE,
-        reason="Graphics tests require the matplotlib library.",
-    )
-
-    return skip(fn)
+skip_plot = graphics.skip_plot
 
 
 skip_sample_data = unittest.skipIf(
diff --git a/lib/iris/tests/graphics/__init__.py b/lib/iris/tests/graphics/__init__.py
new file mode 100755
index 0000000000..13b2bc58a7
--- /dev/null
+++ b/lib/iris/tests/graphics/__init__.py
@@ -0,0 +1,284 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+# !/usr/bin/env python
+"""
+Contains Iris graphic testing utilities
+
+By default, this module sets the matplotlib backend to "agg". But when
+this module is imported it checks ``sys.argv`` for the flag "-d". If
+found, it is removed from ``sys.argv`` and the matplotlib backend is
+switched to "tkagg" to allow the interactive visual inspection of
+graphical test results.
+"""
+
+from collections import defaultdict
+import io
+import os
+from pathlib import Path
+import re
+import sys
+import threading
+import unittest
+
+import numpy as np
+
+# Test for availability of matplotlib.
+# (And remove matplotlib as an iris.tests dependency.)
+try:
+    import matplotlib
+
+    # Override any user settings e.g. from matplotlibrc file.
+    matplotlib.rcdefaults()
+    # Set backend *after* rcdefaults, as we don't want that overridden (#3846).
+    matplotlib.use("agg")
+    # Standardise the figure size across matplotlib versions.
+    # This permits matplotlib png image comparison.
+    matplotlib.rcParams["figure.figsize"] = [8.0, 6.0]
+    import matplotlib.pyplot as plt
+except ImportError:
+    MPL_AVAILABLE = False
+else:
+    MPL_AVAILABLE = True
+
+# Whether to display matplotlib output to the screen.
+_DISPLAY_FIGURES = False
+
+if MPL_AVAILABLE and "-d" in sys.argv:
+    sys.argv.remove("-d")
+    plt.switch_backend("tkagg")
+    _DISPLAY_FIGURES = True
+
+#: Default perceptual hash size.
+_HASH_SIZE = 16
+#: Default maximum perceptual hash hamming distance.
+_HAMMING_DISTANCE = 2
+# Prefix for image test results (that aren't yet verified as good to add to
+# reference images)
+_RESULT_PREFIX = "result-"
+_RESULT_NAME_PATTERN = re.compile(_RESULT_PREFIX + r"(.*).png")
+
+
+def _results_dir():
+    test_results_dir = Path(__file__).parents[1] / Path(
+        "result_image_comparison"
+    )
+
+    if not os.access(test_results_dir, os.W_OK):
+        if not os.access(Path("."), os.W_OK):
+            raise IOError(
+                "Write access to a local disk is required "
+                "to run image tests. Run the tests from a "
+                "current working directory you have write "
+                "access to to avoid this issue."
+            )
+        else:
+            test_results_dir = Path(".") / Path("iris_image_test_output")
+
+    return test_results_dir
+
+
+_IMAGE_NAME_PATTERN = re.compile(r"(.*)_([0-9]+).png")
+
+
+def _get_reference_image_lookup(reference_image_dir):
+    tmp_storage = defaultdict(dict)
+
+    reference_image_dir = Path(reference_image_dir)
+    for reference_image_path in reference_image_dir.iterdir():
+        name_match = _IMAGE_NAME_PATTERN.match(reference_image_path.name)
+        if name_match:
+            test_name = name_match.group(1)
+            image_index = int(name_match.group(2))
+            tmp_storage[test_name][image_index] = reference_image_path
+        else:
+            emsg = f"Incorrectly named image in reference dir: {reference_image_path}"
+            raise ValueError(emsg)
+
+    reference_image_lookup = {}
+
+    for test_name, index_dict in tmp_storage.items():
+        path_list = [None] * (max(index_dict.keys()) + 1)
+        try:
+            for ind, image_path in index_dict.items():
+                path_list[ind] = image_path
+            assert None not in path_list
+        except (KeyError, AssertionError):
+            emsg = f"Reference images for {test_name} numbered incorrectly"
+            raise ValueError(emsg)
+        reference_image_lookup[test_name] = path_list
+
+    return reference_image_lookup
+
+
+def _next_reference_image_name(reference_image_lookup, test_id):
+    try:
+        image_index = len(reference_image_lookup[test_id])
+    except KeyError:
+        image_index = 0
+    fname = Path(f"{test_id}_{image_index}.png")
+    return fname
+
+
+def extract_test_key(result_image_name):
+    """
+    Extracts the name of the test which a result image refers to
+    """
+    name_match = _RESULT_NAME_PATTERN.match(str(result_image_name))
+    if name_match:
+        test_key = name_match.group(1)
+    else:
+        emsg = f"Incorrectly named image in result dir: {result_image_name}"
+        raise ValueError(emsg)
+    return test_key
+
+
+def check_graphic(test_obj):
+    """
+    Check the hash of the current matplotlib figure matches the expected
+    image hash for the current graphic test.
+
+    To create missing image test results, set the IRIS_TEST_CREATE_MISSING
+    environment variable before running the tests. This will result in new
+    and appropriately "<hash>.png" image files being generated in the image
+    output directory, and the imagerepo.json file being updated.
+
+    """
+
+    from PIL import Image
+    import imagehash
+
+    reference_image_lookup = _get_reference_image_lookup(
+        test_obj.get_data_path("images")
+    )
+
+    test_id = test_obj._unique_id()
+
+    dev_mode = os.environ.get("IRIS_TEST_CREATE_MISSING")
+
+    try:
+        #: The path where the images generated by the tests should go.
+        test_results_dir = _results_dir()
+
+        test_results_dir.mkdir(exist_ok=True)
+
+        result_path = test_results_dir / Path(f"{_RESULT_PREFIX}{test_id}.png")
+
+        # Check if test_id is fully qualified, if it's not then try to work
+        # out what it should be
+        if test_id not in reference_image_lookup:
+
+            test_id_candidates = [
+                x for x in reference_image_lookup.keys() if x.endswith(test_id)
+            ]
+
+            if len(test_id_candidates) == 1:
+                (test_id,) = test_id_candidates
+
+        def _create_missing():
+
+            fname = _next_reference_image_name(test_id)
+
+            output_path = test_results_dir / fname
+
+            print(f"Creating image file: {output_path}")
+            figure.savefig(output_path)
+
+        # Calculate the test result perceptual image hash.
+        buffer = io.BytesIO()
+        figure = plt.gcf()
+        figure.savefig(buffer, format="png")
+        buffer.seek(0)
+        phash = imagehash.phash(Image.open(buffer), hash_size=_HASH_SIZE)
+
+        reference_image_names = reference_image_lookup[test_id]
+
+        if reference_image_names:
+
+            expected = [
+                imagehash.phash(
+                    Image.open(test_results_dir / image_name),
+                    hash_size=_HASH_SIZE,
+                )
+                for image_name in reference_image_names
+            ]
+
+            # Calculate hamming distance vector for the result hash.
+            distances = [e - phash for e in expected]
+
+            if np.all([hd > _HAMMING_DISTANCE for hd in distances]):
+                if dev_mode:
+                    _create_missing()
+                else:
+                    figure.savefig(result_path)
+                    msg = (
+                        "Bad phash {} with hamming distance {} " "for test {}."
+                    )
+                    msg = msg.format(phash, distances, test_id)
+                    if _DISPLAY_FIGURES:
+                        emsg = "Image comparison would have failed: {}"
+                        print(emsg.format(msg))
+                    else:
+                        emsg = "Image comparison failed: {}"
+                        raise AssertionError(emsg.format(msg))
+        else:
+            if dev_mode:
+                _create_missing()
+            else:
+                figure.savefig(result_path)
+                emsg = "Missing image test result: {}."
+                raise AssertionError(emsg.format(test_id))
+
+        if _DISPLAY_FIGURES:
+            plt.show()
+
+    finally:
+        plt.close()
+
+
+# Threading non re-entrant blocking lock to ensure thread-safe plotting.
+_lock = threading.Lock()
+
+
+class GraphicsTestMixin:
+
+    # nose directive: dispatch tests concurrently.
+    _multiprocess_can_split_ = True
+
+    def setUp(self):
+        # Acquire threading non re-entrant blocking lock to ensure
+        # thread-safe plotting.
+        _lock.acquire()
+        # Make sure we have no unclosed plots from previous tests before
+        # generating this one.
+        if MPL_AVAILABLE:
+            plt.close("all")
+
+    def tearDown(self):
+        # If a plotting test bombs out it can leave the current figure
+        # in an odd state, so we make sure it's been disposed of.
+        if MPL_AVAILABLE:
+            plt.close("all")
+        # Release the non re-entrant blocking lock.
+        _lock.release()
+
+
+def skip_plot(fn):
+    """
+    Decorator to choose whether to run tests, based on the availability of the
+    matplotlib library.
+
+    Example usage:
+        @skip_plot
+        class MyPlotTests(test.GraphicsTest):
+            ...
+
+    """
+    skip = unittest.skipIf(
+        condition=not MPL_AVAILABLE,
+        reason="Graphics tests require the matplotlib library.",
+    )
+
+    return skip(fn)
diff --git a/lib/iris/tests/graphics/idiff.py b/lib/iris/tests/graphics/idiff.py
new file mode 100755
index 0000000000..c53cf88e43
--- /dev/null
+++ b/lib/iris/tests/graphics/idiff.py
@@ -0,0 +1,265 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+# !/usr/bin/env python
+"""
+Provides "diff-like" comparison of images.
+
+Currently relies on matplotlib for image processing so limited to PNG format.
+
+"""
+
+import argparse
+import hashlib
+from pathlib import Path
+import sys
+import warnings
+
+# Force iris.tests to use the ```tkagg``` backend by using the '-d'
+# command-line argument as idiff is an interactive tool that requires a
+# gui interface.
+sys.argv.append("-d")
+from PIL import Image  # noqa
+import imagehash  # noqa
+import matplotlib.image as mimg  # noqa
+import matplotlib.pyplot as plt  # noqa
+import matplotlib.testing.compare as mcompare  # noqa
+from matplotlib.testing.exceptions import ImageComparisonFailure  # noqa
+import matplotlib.widgets as mwidget  # noqa
+import numpy as np  # noqa
+
+import iris.tests  # noqa
+import iris.tests.graphics as graphics
+
+_POSTFIX_DIFF = "-failed-diff.png"
+
+
+def hash_image(image_path):
+    """
+    Get the sha256 of the contents of an image
+    """
+    return hashlib.sha256(open(image_path, "rb").read()).hexdigest()
+
+
+def image_already_present(check_path, image_dir):
+    """
+    Check if an image is already in the given directory
+    """
+    check_hash = hash_image(check_path)
+    for dir_image_path in image_dir.iterdir():
+        if check_hash == hash_image(dir_image_path):
+            return True
+    return False
+
+
+def diff_viewer(
+    key,
+    status,
+    expected_path,
+    result_path,
+    diff_fname,
+):
+    fig = plt.figure(figsize=(14, 12))
+    plt.suptitle(expected_path.name)
+    ax = plt.subplot(221)
+    ax.imshow(mimg.imread(expected_path))
+    ax = plt.subplot(222, sharex=ax, sharey=ax)
+    ax.imshow(mimg.imread(result_path))
+    ax = plt.subplot(223, sharex=ax, sharey=ax)
+    ax.imshow(mimg.imread(diff_fname))
+
+    result_dir = result_path.parent
+    reference_image_lookup = graphics._get_reference_image_lookup(
+        expected_path.parent
+    )
+
+    def accept(event):
+        if not image_already_present(result_path, expected_path.parent):
+            # Ensure to maintain strict time order where the first uri
+            # associated with the repo key is the oldest, and the last
+            # uri is the youngest
+            out_file = result_dir / graphics._next_reference_image_name(
+                reference_image_lookup, key
+            )
+            result_path.rename(out_file)
+            msg = f"ACCEPTED:  {result_path.name} -> {out_file.name}"
+            print(msg)
+        else:
+            msg = f"DUPLICATE: {result_path.name} -> {expected_path.name} (ignored)"
+            print(msg)
+            result_path.unlink()
+        diff_fname.unlink()
+        plt.close()
+
+    def reject(event):
+        if not image_already_present(result_path, expected_path.parent):
+            print(f"REJECTED:  {result_path.name}")
+        else:
+            msg = f"DUPLICATE: {result_path.name} -> {expected_path.name} (ignored)"
+            print(msg)
+        result_path.unlink()
+        diff_fname.unlink()
+        plt.close()
+
+    def skip(event):
+        # Let's keep both the result and the diff files.
+        print(f"SKIPPED:   {result_path.name}")
+        plt.close()
+
+    ax_accept = plt.axes([0.59, 0.05, 0.1, 0.075])
+    ax_reject = plt.axes([0.7, 0.05, 0.1, 0.075])
+    ax_skip = plt.axes([0.81, 0.05, 0.1, 0.075])
+    baccept = mwidget.Button(ax_accept, "Accept")
+    baccept.on_clicked(accept)
+    breject = mwidget.Button(ax_reject, "Reject")
+    breject.on_clicked(reject)
+    bskip = mwidget.Button(ax_skip, "Skip")
+    bskip.on_clicked(skip)
+    plt.text(0.59, 0.15, status, transform=fig.transFigure)
+    plt.show()
+
+
+def _calculate_hit(image_paths, phash, action):
+
+    expected = [
+        imagehash.phash(Image.open(image_path), hash_size=graphics._HASH_SIZE)
+        for image_path in image_paths
+    ]
+
+    # Calculate the hamming distance vector for the result hash.
+    distances = [e - phash for e in expected]
+
+    if action == "first":
+        index = 0
+    elif action == "last":
+        index = -1
+    elif action == "similar":
+        index = np.argmin(distances)
+    elif action == "different":
+        index = np.argmax(distances)
+    else:
+        emsg = "Unknown action: {!r}"
+        raise ValueError(emsg.format(action))
+
+    return index, distances[index]
+
+
+def step_over_diffs(result_dir, action, display=True):
+    processed = False
+
+    if action in ["first", "last"]:
+        kind = action
+    elif action in ["similar", "different"]:
+        kind = "most {}".format(action)
+    else:
+        emsg = "Unknown action: {!r}"
+        raise ValueError(emsg.format(action))
+    if display:
+        msg = (
+            "\nComparing the {!r} expected image with "
+            "the test result image."
+        )
+        print(msg.format(kind))
+
+    # Remove old image diff results.
+    for fname in result_dir.glob(f"*{_POSTFIX_DIFF}"):
+        fname.unlink()
+
+    # Filter out all non-test result image files.
+    results = []
+    for fname in sorted(result_dir.glob(f"{graphics._RESULT_PREFIX}*.png")):
+        # We only care about PNG images.
+        try:
+            im = Image.open(fname)
+            if im.format != "PNG":
+                # Ignore - it's not a png image.
+                continue
+        except IOError:
+            # Ignore - it's not an image.
+            continue
+        results.append(fname)
+
+    count = len(results)
+
+    reference_image_dir = Path(iris.tests.get_data_path("images"))
+
+    reference_images = graphics._get_reference_image_lookup(
+        reference_image_dir
+    )
+
+    for count_index, result_path in enumerate(results):
+        test_key = graphics.extract_test_key(result_path.name)
+
+        try:
+            # Calculate the test result perceptual image hash.
+            phash = imagehash.phash(
+                Image.open(result_path), hash_size=graphics._HASH_SIZE
+            )
+            relevant_image_paths = reference_images[test_key]
+            hash_index, distance = _calculate_hit(
+                relevant_image_paths, phash, action
+            )
+            relevant_image_path = reference_images[test_key][hash_index]
+        except KeyError:
+            wmsg = "Ignoring unregistered test result {!r}."
+            warnings.warn(wmsg.format(test_key))
+            continue
+
+        processed = True
+
+        try:
+            # Creates the diff file when the images aren't identical
+            mcompare.compare_images(relevant_image_path, result_path, tol=0)
+        except Exception as e:
+            if isinstance(e, ValueError) or isinstance(
+                e, ImageComparisonFailure
+            ):
+                print(f"Could not compare {result_path}: {e}")
+                continue
+            else:
+                # Propagate the exception, keeping the stack trace
+                raise
+        diff_path = result_dir / Path(f"{result_path.stem}{_POSTFIX_DIFF}")
+        args = relevant_image_path, result_path, diff_path
+        if display:
+            status = f"Image {count_index + 1} of {count}: hamming distance = {distance} [{kind}]"
+            prefix = test_key, status
+            yield prefix + args
+        else:
+            yield args
+    if display and not processed:
+        print("\nThere are no iris test result images to process.\n")
+
+
+if __name__ == "__main__":
+    default = Path(iris.tests.__file__).parent / Path(
+        "result_image_comparison"
+    )
+    description = "Iris graphic test difference tool."
+    formatter_class = argparse.RawTextHelpFormatter
+    parser = argparse.ArgumentParser(
+        description=description, formatter_class=formatter_class
+    )
+    help = "path to iris tests result image directory (default: %(default)s)"
+    parser.add_argument("--resultdir", "-r", default=default, help=help)
+    help = 'force "iris.tests" to use the tkagg backend (default: %(default)s)'
+    parser.add_argument("-d", action="store_true", default=True, help=help)
+    help = """
+first     - compare result image with first (oldest) expected image
+last      - compare result image with last (youngest) expected image
+similar   - compare result image with most similar expected image (default)
+different - compare result image with most unsimilar expected image
+"""
+    choices = ("first", "last", "similar", "different")
+    parser.add_argument(
+        "action", nargs="?", choices=choices, default="similar", help=help
+    )
+    args = parser.parse_args()
+    result_dir = Path(args.resultdir)
+    if not result_dir.is_dir():
+        emsg = f"Invalid results directory: {result_dir}"
+        raise ValueError(emsg)
+    for args in step_over_diffs(result_dir, args.action):
+        diff_viewer(*args)
diff --git a/lib/iris/tests/idiff.py b/lib/iris/tests/idiff.py
deleted file mode 100755
index 9770ca116f..0000000000
--- a/lib/iris/tests/idiff.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# Copyright Iris contributors
-#
-# This file is part of Iris and is released under the LGPL license.
-# See COPYING and COPYING.LESSER in the root of the repository for full
-# licensing details.
-# !/usr/bin/env python
-"""
-Provides "diff-like" comparison of images.
-
-Currently relies on matplotlib for image processing so limited to PNG format.
-
-"""
-
-import argparse
-import codecs
-import contextlib
-from glob import glob
-import json
-import os.path
-import shutil
-import sys
-import warnings
-
-# Force iris.tests to use the ```tkagg``` backend by using the '-d'
-# command-line argument as idiff is an interactive tool that requires a
-# gui interface.
-sys.argv.append("-d")
-from PIL import Image  # noqa
-import filelock  # noqa
-import imagehash  # noqa
-import matplotlib.image as mimg  # noqa
-import matplotlib.pyplot as plt  # noqa
-import matplotlib.testing.compare as mcompare  # noqa
-from matplotlib.testing.exceptions import ImageComparisonFailure  # noqa
-import matplotlib.widgets as mwidget  # noqa
-import numpy as np  # noqa
-import requests  # noqa
-
-import iris.tests  # noqa
-import iris.util as iutil  # noqa
-
-_POSTFIX_DIFF = "-failed-diff.png"
-_POSTFIX_JSON = os.path.join("results", "imagerepo.json")
-_POSTFIX_LOCK = os.path.join("results", "imagerepo.lock")
-
-
-@contextlib.contextmanager
-def temp_png(suffix=""):
-    if suffix:
-        suffix = "-{}".format(suffix)
-    fname = iutil.create_temp_filename(suffix + ".png")
-    try:
-        yield fname
-    finally:
-        os.remove(fname)
-
-
-def diff_viewer(
-    repo,
-    key,
-    repo_fname,
-    phash,
-    status,
-    expected_fname,
-    result_fname,
-    diff_fname,
-):
-    fig = plt.figure(figsize=(14, 12))
-    plt.suptitle(os.path.basename(expected_fname))
-    ax = plt.subplot(221)
-    ax.imshow(mimg.imread(expected_fname))
-    ax = plt.subplot(222, sharex=ax, sharey=ax)
-    ax.imshow(mimg.imread(result_fname))
-    ax = plt.subplot(223, sharex=ax, sharey=ax)
-    ax.imshow(mimg.imread(diff_fname))
-
-    result_dir = os.path.dirname(result_fname)
-    fname = "{}.png".format(phash)
-    base_uri = "https://scitools.github.io/test-iris-imagehash/images/v4/{}"
-    uri = base_uri.format(fname)
-    phash_fname = os.path.join(result_dir, fname)
-
-    def accept(event):
-        if uri not in repo[key]:
-            # Ensure to maintain strict time order where the first uri
-            # associated with the repo key is the oldest, and the last
-            # uri is the youngest
-            repo[key].append(uri)
-            # Update the image repo.
-            with open(repo_fname, "wb") as fo:
-                json.dump(
-                    repo,
-                    codecs.getwriter("utf-8")(fo),
-                    indent=4,
-                    sort_keys=True,
-                )
-            os.rename(result_fname, phash_fname)
-            msg = "ACCEPTED:  {} -> {}"
-            print(
-                msg.format(
-                    os.path.basename(result_fname),
-                    os.path.basename(phash_fname),
-                )
-            )
-        else:
-            msg = "DUPLICATE: {} -> {} (ignored)"
-            print(
-                msg.format(
-                    os.path.basename(result_fname),
-                    os.path.basename(phash_fname),
-                )
-            )
-            os.remove(result_fname)
-        os.remove(diff_fname)
-        plt.close()
-
-    def reject(event):
-        if uri not in repo[key]:
-            print("REJECTED:  {}".format(os.path.basename(result_fname)))
-        else:
-            msg = "DUPLICATE: {} -> {} (ignored)"
-            print(
-                msg.format(
-                    os.path.basename(result_fname),
-                    os.path.basename(phash_fname),
-                )
-            )
-        os.remove(result_fname)
-        os.remove(diff_fname)
-        plt.close()
-
-    def skip(event):
-        # Let's keep both the result and the diff files.
-        print("SKIPPED:   {}".format(os.path.basename(result_fname)))
-        plt.close()
-
-    ax_accept = plt.axes([0.59, 0.05, 0.1, 0.075])
-    ax_reject = plt.axes([0.7, 0.05, 0.1, 0.075])
-    ax_skip = plt.axes([0.81, 0.05, 0.1, 0.075])
-    baccept = mwidget.Button(ax_accept, "Accept")
-    baccept.on_clicked(accept)
-    breject = mwidget.Button(ax_reject, "Reject")
-    breject.on_clicked(reject)
-    bskip = mwidget.Button(ax_skip, "Skip")
-    bskip.on_clicked(skip)
-    plt.text(0.59, 0.15, status, transform=fig.transFigure)
-    plt.show()
-
-
-def _calculate_hit(uris, phash, action):
-    # Extract the hex basename strings from the uris.
-    hexes = [os.path.splitext(os.path.basename(uri))[0] for uri in uris]
-    # Create the expected perceptual image hashes from the uris.
-    to_hash = imagehash.hex_to_hash
-    expected = [to_hash(uri_hex) for uri_hex in hexes]
-    # Calculate the hamming distance vector for the result hash.
-    distances = [e - phash for e in expected]
-
-    if action == "first":
-        index = 0
-    elif action == "last":
-        index = -1
-    elif action == "similar":
-        index = np.argmin(distances)
-    elif action == "different":
-        index = np.argmax(distances)
-    else:
-        emsg = "Unknown action: {!r}"
-        raise ValueError(emsg.format(action))
-
-    return index, distances[index]
-
-
-def step_over_diffs(result_dir, action, display=True):
-    processed = False
-    dname = os.path.dirname(iris.tests.__file__)
-    lock = filelock.FileLock(os.path.join(dname, _POSTFIX_LOCK))
-    if action in ["first", "last"]:
-        kind = action
-    elif action in ["similar", "different"]:
-        kind = "most {}".format(action)
-    else:
-        emsg = "Unknown action: {!r}"
-        raise ValueError(emsg.format(action))
-    if display:
-        msg = (
-            "\nComparing the {!r} expected image with "
-            "the test result image."
-        )
-        print(msg.format(kind))
-
-    # Remove old image diff results.
-    target = os.path.join(result_dir, "*{}".format(_POSTFIX_DIFF))
-    for fname in glob(target):
-        os.remove(fname)
-
-    with lock.acquire(timeout=30):
-        # Load the imagerepo.
-        repo_fname = os.path.join(dname, _POSTFIX_JSON)
-        with open(repo_fname, "rb") as fi:
-            repo = json.load(codecs.getreader("utf-8")(fi))
-
-        # Filter out all non-test result image files.
-        target_glob = os.path.join(result_dir, "result-*.png")
-        results = []
-        for fname in sorted(glob(target_glob)):
-            # We only care about PNG images.
-            try:
-                im = Image.open(fname)
-                if im.format != "PNG":
-                    # Ignore - it's not a png image.
-                    continue
-            except IOError:
-                # Ignore - it's not an image.
-                continue
-            results.append(fname)
-
-        count = len(results)
-
-        for count_index, result_fname in enumerate(results):
-            key = os.path.splitext(
-                "-".join(result_fname.split("result-")[1:])
-            )[0]
-            try:
-                # Calculate the test result perceptual image hash.
-                phash = imagehash.phash(
-                    Image.open(result_fname), hash_size=iris.tests._HASH_SIZE
-                )
-                uris = repo[key]
-                hash_index, distance = _calculate_hit(uris, phash, action)
-                uri = uris[hash_index]
-            except KeyError:
-                wmsg = "Ignoring unregistered test result {!r}."
-                warnings.warn(wmsg.format(key))
-                continue
-            with temp_png(key) as expected_fname:
-                processed = True
-                resource = requests.get(uri)
-                if resource.status_code == 200:
-                    with open(expected_fname, "wb") as fo:
-                        fo.write(resource.content)
-                else:
-                    # Perhaps the uri has not been pushed into the repo yet,
-                    # so check if a local "developer" copy is available ...
-                    local_fname = os.path.join(
-                        result_dir, os.path.basename(uri)
-                    )
-                    if not os.path.isfile(local_fname):
-                        emsg = "Bad URI {!r} for test {!r}."
-                        raise ValueError(emsg.format(uri, key))
-                    else:
-                        # The temporary expected filename has the test name
-                        # baked into it, and is used in the diff plot title.
-                        # So copy the local file to the exected file to
-                        # maintain this helpfulness.
-                        shutil.copy(local_fname, expected_fname)
-                try:
-                    mcompare.compare_images(
-                        expected_fname, result_fname, tol=0
-                    )
-                except Exception as e:
-                    if isinstance(e, ValueError) or isinstance(
-                        e, ImageComparisonFailure
-                    ):
-                        print(
-                            "Could not compare {}: {}".format(result_fname, e)
-                        )
-                        continue
-                    else:
-                        # Propagate the exception, keeping the stack trace
-                        raise
-                diff_fname = os.path.splitext(result_fname)[0] + _POSTFIX_DIFF
-                args = expected_fname, result_fname, diff_fname
-                if display:
-                    msg = "Image {} of {}: hamming distance = {} " "[{!r}]"
-                    status = msg.format(count_index + 1, count, distance, kind)
-                    prefix = repo, key, repo_fname, phash, status
-                    yield prefix + args
-                else:
-                    yield args
-        if display and not processed:
-            print("\nThere are no iris test result images to process.\n")
-
-
-if __name__ == "__main__":
-    default = os.path.join(
-        os.path.dirname(iris.tests.__file__), "result_image_comparison"
-    )
-    description = "Iris graphic test difference tool."
-    formatter_class = argparse.RawTextHelpFormatter
-    parser = argparse.ArgumentParser(
-        description=description, formatter_class=formatter_class
-    )
-    help = "path to iris tests result image directory (default: %(default)s)"
-    parser.add_argument("--resultdir", "-r", default=default, help=help)
-    help = 'force "iris.tests" to use the tkagg backend (default: %(default)s)'
-    parser.add_argument("-d", action="store_true", default=True, help=help)
-    help = """
-first     - compare result image with first (oldest) expected image
-last      - compare result image with last (youngest) expected image
-similar   - compare result image with most similar expected image (default)
-different - compare result image with most unsimilar expected image
-"""
-    choices = ("first", "last", "similar", "different")
-    parser.add_argument(
-        "action", nargs="?", choices=choices, default="similar", help=help
-    )
-    args = parser.parse_args()
-    result_dir = args.resultdir
-    if not os.path.isdir(result_dir):
-        emsg = "Invalid results directory: {}"
-        raise ValueError(emsg.format(result_dir))
-    for args in step_over_diffs(result_dir, args.action):
-        diff_viewer(*args)
diff --git a/requirements/ci/py38.yml b/requirements/ci/py38.yml
index ef095815c9..d552861f31 100644
--- a/requirements/ci/py38.yml
+++ b/requirements/ci/py38.yml
@@ -34,7 +34,7 @@ dependencies:
   - filelock
   - imagehash >=4.0
   - nose
-  - pillow <7
+  - pillow
   - pre-commit
   - requests