diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index 1d6df220118..1edcc76722b 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -5,12 +5,14 @@ import json import logging import os +from pathlib import Path from typing import Any, Dict, List, Optional, Set from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._vendor.packaging.utils import canonicalize_name from pip._internal.exceptions import InvalidWheelFilename +from pip._internal.models.direct_url import DirectUrl from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel @@ -19,6 +21,8 @@ logger = logging.getLogger(__name__) +ORIGIN_JSON_NAME = "origin.json" + def _hash_dict(d: Dict[str, str]) -> str: """Return a stable sha224 of a dictionary.""" @@ -204,6 +208,10 @@ def __init__( ): self.link = link self.persistent = persistent + self.origin: Optional[DirectUrl] = None + origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME + if origin_direct_url_path.exists(): + self.origin = DirectUrl.from_json(origin_direct_url_path.read_text()) class WheelCache(Cache): @@ -262,3 +270,20 @@ def get_cache_entry( return CacheEntry(retval, persistent=False) return None + + @staticmethod + def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None: + origin_path = Path(cache_dir) / ORIGIN_JSON_NAME + if origin_path.is_file(): + origin = DirectUrl.from_json(origin_path.read_text()) + # TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564 + # is merged. + if origin.url != download_info.url: + logger.warning( + "Origin URL %s in cache entry %s does not match download URL %s. " + "This is likely a pip bug or a cache corruption issue.", + origin.url, + cache_dir, + download_info.url, + ) + origin_path.write_text(download_info.to_json()) diff --git a/src/pip/_internal/resolution/legacy/resolver.py b/src/pip/_internal/resolution/legacy/resolver.py index 1225ae70fcb..ddfda41279c 100644 --- a/src/pip/_internal/resolution/legacy/resolver.py +++ b/src/pip/_internal/resolution/legacy/resolver.py @@ -431,6 +431,8 @@ def _populate_link(self, req: InstallRequirement) -> None: logger.debug("Using cached wheel link: %s", cache_entry.link) if req.link is req.original_link and cache_entry.persistent: req.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + req.download_info = cache_entry.origin req.link = cache_entry.link def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution: diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index d1470ecbf4e..2561e03f0ed 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -287,6 +287,8 @@ def __init__( and template.link is template.original_link ): ireq.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + ireq.download_info = cache_entry.origin super().__init__( link=link, diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index d0663443b22..77a17ff0f15 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -354,6 +354,12 @@ def build( req.editable and req.permit_editable_wheels, ) if wheel_file: + # Record the download origin in the cache + if req.download_info is not None: + # download_info is guaranteed to be set because when we build an + # InstallRequirement it has been through the preparer before, but + # let's be cautious. + wheel_cache.record_download_origin(cache_dir, req.download_info) # Update the link for this. req.link = Link(path_to_url(wheel_file)) req.local_file_path = req.link.file_path