From 73fd93f898966b266c47ac4f23283fd824c2cf34 Mon Sep 17 00:00:00 2001
From: Stephen Rosen <sirosen@globus.org>
Date: Mon, 25 Nov 2024 10:48:57 -0600
Subject: [PATCH 1/2] Internal refactor: no default cache dirname

The two cache dirs are currently "refs" and "downloads". Make these
always explicit, rather than `"downloads"` being an internal default
baked into the downloader.
---
 src/check_jsonschema/cachedownloader.py        | 11 +++--------
 src/check_jsonschema/schema_loader/readers.py  |  2 +-
 src/check_jsonschema/schema_loader/resolver.py |  2 +-
 tests/unit/test_cachedownloader.py             | 17 +++++++++--------
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/check_jsonschema/cachedownloader.py b/src/check_jsonschema/cachedownloader.py
index 6b9865126..40f1d2cbb 100644
--- a/src/check_jsonschema/cachedownloader.py
+++ b/src/check_jsonschema/cachedownloader.py
@@ -33,7 +33,7 @@ def _base_cache_dir() -> str | None:
     return cache_dir
 
 
-def _resolve_cache_dir(dirname: str = "downloads") -> str | None:
+def _resolve_cache_dir(dirname: str) -> str | None:
     cache_dir = _base_cache_dir()
     if cache_dir:
         cache_dir = os.path.join(cache_dir, "check_jsonschema", dirname)
@@ -100,13 +100,8 @@ class FailedDownloadError(Exception):
 
 
 class CacheDownloader:
-    def __init__(
-        self, cache_dir: str | None = None, disable_cache: bool = False
-    ) -> None:
-        if cache_dir is None:
-            self._cache_dir = _resolve_cache_dir()
-        else:
-            self._cache_dir = _resolve_cache_dir(cache_dir)
+    def __init__(self, cache_dir: str, *, disable_cache: bool = False) -> None:
+        self._cache_dir = _resolve_cache_dir(cache_dir)
         self._disable_cache = disable_cache
 
     def _download(
diff --git a/src/check_jsonschema/schema_loader/readers.py b/src/check_jsonschema/schema_loader/readers.py
index 65280808b..f496d42eb 100644
--- a/src/check_jsonschema/schema_loader/readers.py
+++ b/src/check_jsonschema/schema_loader/readers.py
@@ -79,7 +79,7 @@ def __init__(
         self.url = url
         self.parsers = ParserSet()
         self.downloader = CacheDownloader(
-            disable_cache=disable_cache,
+            "downloads", disable_cache=disable_cache
         ).bind(url, cache_filename, validation_callback=self._parse)
         self._parsed_schema: dict | _UnsetType = _UNSET
 
diff --git a/src/check_jsonschema/schema_loader/resolver.py b/src/check_jsonschema/schema_loader/resolver.py
index 5084328a5..a82ae31c9 100644
--- a/src/check_jsonschema/schema_loader/resolver.py
+++ b/src/check_jsonschema/schema_loader/resolver.py
@@ -66,7 +66,7 @@ def create_retrieve_callable(
         base_uri = retrieval_uri
 
     cache = ResourceCache()
-    downloader = CacheDownloader("refs", disable_cache)
+    downloader = CacheDownloader("refs", disable_cache=disable_cache)
 
     def get_local_file(uri: str) -> t.Any:
         path = filename2path(uri)
diff --git a/tests/unit/test_cachedownloader.py b/tests/unit/test_cachedownloader.py
index 30d47052b..99382e921 100644
--- a/tests/unit/test_cachedownloader.py
+++ b/tests/unit/test_cachedownloader.py
@@ -30,7 +30,7 @@ def default_response():
 
 
 def test_default_filename_from_uri(default_response):
-    cd = CacheDownloader().bind("https://example.com/schema1.json")
+    cd = CacheDownloader("downloads").bind("https://example.com/schema1.json")
     assert cd._filename == "schema1.json"
 
 
@@ -76,7 +76,7 @@ def fake_expanduser(path):
     monkeypatch.setattr(platform, "system", fakesystem)
     monkeypatch.setattr(os.path, "expanduser", fake_expanduser)
 
-    cd = CacheDownloader()
+    cd = CacheDownloader("downloads")
     assert cd._cache_dir == expect_value
 
     if sysname == "Darwin":
@@ -114,7 +114,7 @@ def test_cachedownloader_cached_file(tmp_path, monkeypatch, default_response):
     f.write_text("{}")
 
     # set the cache_dir to the tmp dir (so that cache_dir will always be set)
-    cd = CacheDownloader(cache_dir=tmp_path).bind(str(f))
+    cd = CacheDownloader(tmp_path).bind(str(f))
     # patch the downloader to skip any download "work"
     monkeypatch.setattr(
         cd._downloader, "_download", lambda file_uri, filename, response_ok: str(f)
@@ -128,7 +128,7 @@ def test_cachedownloader_cached_file(tmp_path, monkeypatch, default_response):
 def test_cachedownloader_on_success(get_download_cache_loc, disable_cache):
     add_default_response()
     f = get_download_cache_loc("schema1.json")
-    cd = CacheDownloader(disable_cache=disable_cache).bind(
+    cd = CacheDownloader("downloads", disable_cache=disable_cache).bind(
         "https://example.com/schema1.json"
     )
 
@@ -171,7 +171,7 @@ def test_cachedownloader_succeeds_after_few_errors(
         )
     add_default_response()
     f = get_download_cache_loc("schema1.json")
-    cd = CacheDownloader(disable_cache=disable_cache).bind(
+    cd = CacheDownloader("downloads", disable_cache=disable_cache).bind(
         "https://example.com/schema1.json"
     )
 
@@ -205,7 +205,7 @@ def test_cachedownloader_fails_after_many_errors(
             )
     add_default_response()  # never reached, the 11th response
     f = get_download_cache_loc("schema1.json")
-    cd = CacheDownloader(disable_cache=disable_cache).bind(
+    cd = CacheDownloader("downloads", disable_cache=disable_cache).bind(
         "https://example.com/schema1.json"
     )
     with pytest.raises(FailedDownloadError):
@@ -226,6 +226,7 @@ def test_cachedownloader_retries_on_bad_data(get_download_cache_loc, disable_cac
     add_default_response()
     f = get_download_cache_loc("schema1.json")
     cd = CacheDownloader(
+        "downloads",
         disable_cache=disable_cache,
     ).bind(
         "https://example.com/schema1.json",
@@ -281,7 +282,7 @@ def fake_mktime(*args):
     if file_exists:
         inject_cached_download(uri, original_file_contents)
 
-    cd = CacheDownloader().bind(uri)
+    cd = CacheDownloader("downloads").bind(uri)
 
     # if the file already existed, it will not be overwritten by the cachedownloader
     # so the returned value for both the downloader and a direct file read should be the
@@ -326,7 +327,7 @@ def dummy_validate_bytes(data):
 
     # construct a downloader pointed at the schema and file, expecting a cache hit
     # and use the above validation method
-    cd = CacheDownloader().bind(
+    cd = CacheDownloader("downloads").bind(
         "https://example.com/schema1.json",
         validation_callback=dummy_validate_bytes,
     )

From 6e943027eb1d941132ca6efb2eb85136f4176461 Mon Sep 17 00:00:00 2001
From: Stephen Rosen <sirosen@globus.org>
Date: Fri, 29 Nov 2024 07:54:47 -0600
Subject: [PATCH 2/2] Caching URLs always uses hashes

1. When caching schemas in the `downloads/` dir, hash their URLs, just
   as is done in the `refs/` cache. This fixes buggy behaviors due to
   cache confusion on matching filenames.

2. Deprecate (but do not, at this time, remove) the `--cache-filename`
   option, making it non-functional. This also removes the
   `cache_filename` internal arg from several components.

Some minimal test and doc cleanup is needed, to appropriately handle
these changes.
---
 docs/usage.rst                                |  7 +-
 src/check_jsonschema/cachedownloader.py       | 26 +++++-
 src/check_jsonschema/cli/main_command.py      |  9 +--
 src/check_jsonschema/schema_loader/main.py    |  8 +-
 src/check_jsonschema/schema_loader/readers.py |  7 +-
 .../schema_loader/resolver.py                 | 20 +----
 tests/conftest.py                             | 24 ++++--
 tests/unit/test_cachedownloader.py            | 80 ++++++++++---------
 8 files changed, 91 insertions(+), 90 deletions(-)

diff --git a/docs/usage.rst b/docs/usage.rst
index 7c8d79c4d..56a5bb12a 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -118,6 +118,10 @@ Downloading and Caching
 By default, when ``--schemafile`` is used to refer to an ``http://`` or
 ``https://`` location, the schema is downloaded and cached based on the
 schema's Last-Modified time.
+
+Additionally, when ``$ref``\s are looked up during schema resolution, they are
+similarly cached.
+
 The following options control caching behaviors.
 
 .. list-table:: Caching Options
@@ -128,9 +132,6 @@ The following options control caching behaviors.
      - Description
    * - ``--no-cache``
      - Disable caching.
-   * - ``--cache-filename``
-     - The name to use for caching a remote schema.
-       Defaults to using the last slash-delimited part of the URI.
 
 "format" Validation Options
 ---------------------------
diff --git a/src/check_jsonschema/cachedownloader.py b/src/check_jsonschema/cachedownloader.py
index 40f1d2cbb..41cf2aae3 100644
--- a/src/check_jsonschema/cachedownloader.py
+++ b/src/check_jsonschema/cachedownloader.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import contextlib
+import hashlib
 import io
 import os
 import platform
@@ -95,6 +96,25 @@ def _cache_hit(cachefile: str, response: requests.Response) -> bool:
     return local_mtime >= remote_mtime
 
 
+def url_to_cache_filename(ref_url: str) -> str:
+    """
+    Given a schema URL, convert it to a filename for caching in a cache dir.
+
+    Rules are as follows:
+    - the base filename is an sha256 hash of the URL
+    - if the filename ends in an extension (.json, .yaml, etc) that extension
+      is appended to the hash
+
+    Preserving file extensions preserves the extension-based logic used for parsing, and
+    it also helps a local editor (browsing the cache) identify filetypes.
+    """
+    filename = hashlib.sha256(ref_url.encode()).hexdigest()
+    if "." in (last_part := ref_url.rpartition("/")[-1]):
+        _, _, extension = last_part.rpartition(".")
+        filename = f"{filename}.{extension}"
+    return filename
+
+
 class FailedDownloadError(Exception):
     pass
 
@@ -155,7 +175,7 @@ def bind(
         validation_callback: t.Callable[[bytes], t.Any] | None = None,
     ) -> BoundCacheDownloader:
         return BoundCacheDownloader(
-            file_url, filename, self, validation_callback=validation_callback
+            file_url, self, filename=filename, validation_callback=validation_callback
         )
 
 
@@ -163,13 +183,13 @@ class BoundCacheDownloader:
     def __init__(
         self,
         file_url: str,
-        filename: str | None,
         downloader: CacheDownloader,
         *,
+        filename: str | None = None,
         validation_callback: t.Callable[[bytes], t.Any] | None = None,
     ) -> None:
         self._file_url = file_url
-        self._filename = filename or file_url.split("/")[-1]
+        self._filename = filename or url_to_cache_filename(file_url)
         self._downloader = downloader
         self._validation_callback = validation_callback
 
diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py
index 3145019e8..3a7aae643 100644
--- a/src/check_jsonschema/cli/main_command.py
+++ b/src/check_jsonschema/cli/main_command.py
@@ -130,11 +130,7 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
     help="Disable schema caching. Always download remote schemas.",
 )
 @click.option(
-    "--cache-filename",
-    help=(
-        "The name to use for caching a remote schema. "
-        "Defaults to the last slash-delimited part of the URI."
-    ),
+    "--cache-filename", help="Deprecated. This option no longer has any effect."
 )
 @click.option(
     "--disable-formats",
@@ -271,8 +267,6 @@ def main(
     args.disable_cache = no_cache
     args.default_filetype = default_filetype
     args.fill_defaults = fill_defaults
-    if cache_filename is not None:
-        args.cache_filename = cache_filename
     if data_transform is not None:
         args.data_transform = TRANSFORM_LIBRARY[data_transform]
 
@@ -300,7 +294,6 @@ def build_schema_loader(args: ParseResult) -> SchemaLoaderBase:
         assert args.schema_path is not None
         return SchemaLoader(
             args.schema_path,
-            cache_filename=args.cache_filename,
             disable_cache=args.disable_cache,
             base_uri=args.base_uri,
             validator_class=args.validator_class,
diff --git a/src/check_jsonschema/schema_loader/main.py b/src/check_jsonschema/schema_loader/main.py
index 88ff6a0f2..099107455 100644
--- a/src/check_jsonschema/schema_loader/main.py
+++ b/src/check_jsonschema/schema_loader/main.py
@@ -64,14 +64,12 @@ def __init__(
         self,
         schemafile: str,
         *,
-        cache_filename: str | None = None,
         base_uri: str | None = None,
         validator_class: type[jsonschema.protocols.Validator] | None = None,
         disable_cache: bool = True,
     ) -> None:
         # record input parameters (these are not to be modified)
         self.schemafile = schemafile
-        self.cache_filename = cache_filename
         self.disable_cache = disable_cache
         self.base_uri = base_uri
         self.validator_class = validator_class
@@ -105,11 +103,7 @@ def _get_schema_reader(
             return LocalSchemaReader(self.schemafile)
 
         if self.url_info.scheme in ("http", "https"):
-            return HttpSchemaReader(
-                self.schemafile,
-                self.cache_filename,
-                self.disable_cache,
-            )
+            return HttpSchemaReader(self.schemafile, self.disable_cache)
         else:
             raise UnsupportedUrlScheme(
                 "check-jsonschema only supports http, https, and local files. "
diff --git a/src/check_jsonschema/schema_loader/readers.py b/src/check_jsonschema/schema_loader/readers.py
index f496d42eb..61299350a 100644
--- a/src/check_jsonschema/schema_loader/readers.py
+++ b/src/check_jsonschema/schema_loader/readers.py
@@ -73,14 +73,13 @@ class HttpSchemaReader:
     def __init__(
         self,
         url: str,
-        cache_filename: str | None,
         disable_cache: bool,
     ) -> None:
         self.url = url
         self.parsers = ParserSet()
-        self.downloader = CacheDownloader(
-            "downloads", disable_cache=disable_cache
-        ).bind(url, cache_filename, validation_callback=self._parse)
+        self.downloader = CacheDownloader("schemas", disable_cache=disable_cache).bind(
+            url, validation_callback=self._parse
+        )
         self._parsed_schema: dict | _UnsetType = _UNSET
 
     def _parse(self, schema_bytes: bytes) -> t.Any:
diff --git a/src/check_jsonschema/schema_loader/resolver.py b/src/check_jsonschema/schema_loader/resolver.py
index a82ae31c9..15344d6bd 100644
--- a/src/check_jsonschema/schema_loader/resolver.py
+++ b/src/check_jsonschema/schema_loader/resolver.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import hashlib
 import typing as t
 import urllib.parse
 
@@ -12,21 +11,6 @@
 from ..utils import filename2path
 
 
-def ref_url_to_cache_filename(ref_url: str) -> str:
-    """
-    Given a $ref URL, convert it to the filename in the refs/ cache dir.
-    Rules are as follows:
-    - the base filename is an md5 hash of the URL
-    - if the filename ends in an extension (.json, .yaml, etc) that extension
-      is appended to the hash
-    """
-    filename = hashlib.md5(ref_url.encode()).hexdigest()
-    if "." in (last_part := ref_url.rpartition("/")[-1]):
-        _, _, extension = last_part.rpartition(".")
-        filename = f"{filename}.{extension}"
-    return filename
-
-
 def make_reference_registry(
     parsers: ParserSet, retrieval_uri: str | None, schema: dict, disable_cache: bool
 ) -> referencing.Registry:
@@ -89,9 +73,7 @@ def validation_callback(content: bytes) -> None:
                 parser_set.parse_data_with_path(content, full_uri, "json")
 
             bound_downloader = downloader.bind(
-                full_uri,
-                ref_url_to_cache_filename(full_uri),
-                validation_callback,
+                full_uri, validation_callback=validation_callback
             )
             with bound_downloader.open() as fp:
                 data = fp.read()
diff --git a/tests/conftest.py b/tests/conftest.py
index 9179f6eed..b0022a8d3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -62,15 +62,25 @@ def patch_cache_dir(monkeypatch, cache_dir):
         yield m
 
 
+@pytest.fixture
+def url2cachepath():
+    from check_jsonschema.cachedownloader import url_to_cache_filename
+
+    def _get(cache_dir, url):
+        return cache_dir / url_to_cache_filename(url)
+
+    return _get
+
+
 @pytest.fixture
 def downloads_cache_dir(tmp_path):
     return tmp_path / ".cache" / "check_jsonschema" / "downloads"
 
 
 @pytest.fixture
-def get_download_cache_loc(downloads_cache_dir):
-    def _get(uri):
-        return downloads_cache_dir / uri.split("/")[-1]
+def get_download_cache_loc(downloads_cache_dir, url2cachepath):
+    def _get(url):
+        return url2cachepath(downloads_cache_dir, url)
 
     return _get
 
@@ -94,11 +104,9 @@ def refs_cache_dir(tmp_path):
 
 
 @pytest.fixture
-def get_ref_cache_loc(refs_cache_dir):
-    from check_jsonschema.schema_loader.resolver import ref_url_to_cache_filename
-
-    def _get(uri):
-        return refs_cache_dir / ref_url_to_cache_filename(uri)
+def get_ref_cache_loc(refs_cache_dir, url2cachepath):
+    def _get(url):
+        return url2cachepath(refs_cache_dir, url)
 
     return _get
 
diff --git a/tests/unit/test_cachedownloader.py b/tests/unit/test_cachedownloader.py
index 99382e921..1a0949aed 100644
--- a/tests/unit/test_cachedownloader.py
+++ b/tests/unit/test_cachedownloader.py
@@ -11,13 +11,16 @@
     CacheDownloader,
     FailedDownloadError,
     _cache_hit,
+    url_to_cache_filename,
 )
 
+DEFAULT_RESPONSE_URL = "https://example.com/schema1.json"
+
 
 def add_default_response():
     responses.add(
         "GET",
-        "https://example.com/schema1.json",
+        DEFAULT_RESPONSE_URL,
         headers={"Last-Modified": "Sun, 01 Jan 2000 00:00:01 GMT"},
         json={},
         match_querystring=None,
@@ -30,8 +33,8 @@ def default_response():
 
 
 def test_default_filename_from_uri(default_response):
-    cd = CacheDownloader("downloads").bind("https://example.com/schema1.json")
-    assert cd._filename == "schema1.json"
+    cd = CacheDownloader("downloads").bind(DEFAULT_RESPONSE_URL)
+    assert cd._filename == url_to_cache_filename(DEFAULT_RESPONSE_URL)
 
 
 @pytest.mark.parametrize(
@@ -94,7 +97,7 @@ def test_cache_hit_by_mtime(monkeypatch, default_response):
     monkeypatch.setattr(os.path, "getmtime", lambda x: time.time())
     assert _cache_hit(
         "/tmp/schema1.json",
-        requests.get("https://example.com/schema1.json", stream=True),
+        requests.get(DEFAULT_RESPONSE_URL, stream=True),
     )
 
     # local mtime = 0, cache miss
@@ -102,7 +105,7 @@ def test_cache_hit_by_mtime(monkeypatch, default_response):
     assert (
         _cache_hit(
             "/tmp/schema1.json",
-            requests.get("https://example.com/schema1.json", stream=True),
+            requests.get(DEFAULT_RESPONSE_URL, stream=True),
         )
         is False
     )
@@ -114,7 +117,7 @@ def test_cachedownloader_cached_file(tmp_path, monkeypatch, default_response):
     f.write_text("{}")
 
     # set the cache_dir to the tmp dir (so that cache_dir will always be set)
-    cd = CacheDownloader(tmp_path).bind(str(f))
+    cd = CacheDownloader(tmp_path).bind(str(f), filename="foo.json")
     # patch the downloader to skip any download "work"
     monkeypatch.setattr(
         cd._downloader, "_download", lambda file_uri, filename, response_ok: str(f)
@@ -125,11 +128,12 @@ def test_cachedownloader_cached_file(tmp_path, monkeypatch, default_response):
 
 
 @pytest.mark.parametrize("disable_cache", (True, False))
-def test_cachedownloader_on_success(get_download_cache_loc, disable_cache):
-    add_default_response()
-    f = get_download_cache_loc("schema1.json")
+def test_cachedownloader_on_success(
+    get_download_cache_loc, disable_cache, default_response
+):
+    f = get_download_cache_loc(DEFAULT_RESPONSE_URL)
     cd = CacheDownloader("downloads", disable_cache=disable_cache).bind(
-        "https://example.com/schema1.json"
+        DEFAULT_RESPONSE_URL
     )
 
     with cd.open() as fp:
@@ -140,10 +144,12 @@ def test_cachedownloader_on_success(get_download_cache_loc, disable_cache):
         assert f.exists()
 
 
-def test_cachedownloader_using_alternate_target_dir(cache_dir):
-    add_default_response()
-    f = cache_dir / "check_jsonschema" / "otherdir" / "schema1.json"
-    cd = CacheDownloader("otherdir").bind("https://example.com/schema1.json")
+def test_cachedownloader_using_alternate_target_dir(
+    cache_dir, default_response, url2cachepath
+):
+    cache_dir = cache_dir / "check_jsonschema" / "otherdir"
+    f = url2cachepath(cache_dir, DEFAULT_RESPONSE_URL)
+    cd = CacheDownloader("otherdir").bind(DEFAULT_RESPONSE_URL)
     with cd.open() as fp:
         assert fp.read() == b"{}"
     assert f.exists()
@@ -158,21 +164,21 @@ def test_cachedownloader_succeeds_after_few_errors(
         for _i in range(failures):
             responses.add(
                 "GET",
-                "https://example.com/schema1.json",
+                DEFAULT_RESPONSE_URL,
                 status=500,
                 match_querystring=None,
             )
     else:
         responses.add(
             "GET",
-            "https://example.com/schema1.json",
+            DEFAULT_RESPONSE_URL,
             body=failures(),
             match_querystring=None,
         )
     add_default_response()
-    f = get_download_cache_loc("schema1.json")
+    f = get_download_cache_loc(DEFAULT_RESPONSE_URL)
     cd = CacheDownloader("downloads", disable_cache=disable_cache).bind(
-        "https://example.com/schema1.json"
+        DEFAULT_RESPONSE_URL
     )
 
     with cd.open() as fp:
@@ -192,21 +198,21 @@ def test_cachedownloader_fails_after_many_errors(
         if connection_error:
             responses.add(
                 "GET",
-                "https://example.com/schema1.json",
+                DEFAULT_RESPONSE_URL,
                 body=requests.ConnectionError(),
                 match_querystring=None,
             )
         else:
             responses.add(
                 "GET",
-                "https://example.com/schema1.json",
+                DEFAULT_RESPONSE_URL,
                 status=500,
                 match_querystring=None,
             )
     add_default_response()  # never reached, the 11th response
-    f = get_download_cache_loc("schema1.json")
+    f = get_download_cache_loc(DEFAULT_RESPONSE_URL)
     cd = CacheDownloader("downloads", disable_cache=disable_cache).bind(
-        "https://example.com/schema1.json"
+        DEFAULT_RESPONSE_URL
     )
     with pytest.raises(FailedDownloadError):
         with cd.open():
@@ -218,19 +224,18 @@ def test_cachedownloader_fails_after_many_errors(
 def test_cachedownloader_retries_on_bad_data(get_download_cache_loc, disable_cache):
     responses.add(
         "GET",
-        "https://example.com/schema1.json",
+        DEFAULT_RESPONSE_URL,
         status=200,
         body="{",
         match_querystring=None,
     )
     add_default_response()
-    f = get_download_cache_loc("schema1.json")
+    f = get_download_cache_loc(DEFAULT_RESPONSE_URL)
     cd = CacheDownloader(
         "downloads",
         disable_cache=disable_cache,
     ).bind(
-        "https://example.com/schema1.json",
-        filename=str(f),
+        DEFAULT_RESPONSE_URL,
         validation_callback=json.loads,
     )
 
@@ -254,13 +259,14 @@ def test_cachedownloader_handles_bad_lastmod_header(
     file_exists,
     failure_mode,
 ):
-    uri = "https://example.com/schema1.json"
     if failure_mode == "header_missing":
-        responses.add("GET", uri, headers={}, json={}, match_querystring=None)
+        responses.add(
+            "GET", DEFAULT_RESPONSE_URL, headers={}, json={}, match_querystring=None
+        )
     elif failure_mode == "header_malformed":
         responses.add(
             "GET",
-            uri,
+            DEFAULT_RESPONSE_URL,
             headers={"Last-Modified": "Jan 2000 00:00:01"},
             json={},
             match_querystring=None,
@@ -276,13 +282,13 @@ def fake_mktime(*args):
         raise NotImplementedError
 
     original_file_contents = b'{"foo": "bar"}'
-    file_path = get_download_cache_loc(uri)
+    file_path = get_download_cache_loc(DEFAULT_RESPONSE_URL)
 
     assert not file_path.exists()
     if file_exists:
-        inject_cached_download(uri, original_file_contents)
+        inject_cached_download(DEFAULT_RESPONSE_URL, original_file_contents)
 
-    cd = CacheDownloader("downloads").bind(uri)
+    cd = CacheDownloader("downloads").bind(DEFAULT_RESPONSE_URL)
 
     # if the file already existed, it will not be overwritten by the cachedownloader
     # so the returned value for both the downloader and a direct file read should be the
@@ -303,7 +309,7 @@ def fake_mktime(*args):
 
 
 def test_cachedownloader_validation_is_not_invoked_on_hit(
-    monkeypatch, inject_cached_download
+    monkeypatch, default_response, inject_cached_download
 ):
     """
     Regression test for https://github.com/python-jsonschema/check-jsonschema/issues/453
@@ -311,12 +317,10 @@ def test_cachedownloader_validation_is_not_invoked_on_hit(
     This was a bug in which the validation callback was invoked eagerly, even on a cache
     hit. As a result, cache hits did not demonstrate their expected performance gain.
     """
-    uri = "https://example.com/schema1.json"
-
     # 1: construct some perfectly good data (it doesn't really matter what it is)
-    add_default_response()
+    #    <<default_response fixture>>
     # 2: put equivalent data on disk
-    inject_cached_download(uri, "{}")
+    inject_cached_download(DEFAULT_RESPONSE_URL, "{}")
 
     # 3: construct a validator which marks that it ran in a variable
     validator_ran = False
@@ -328,7 +332,7 @@ def dummy_validate_bytes(data):
     # construct a downloader pointed at the schema and file, expecting a cache hit
     # and use the above validation method
     cd = CacheDownloader("downloads").bind(
-        "https://example.com/schema1.json",
+        DEFAULT_RESPONSE_URL,
         validation_callback=dummy_validate_bytes,
     )