Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use data-dist-info-metadata (PEP 658) to decouple resolution from downloading #11111

Merged
merged 18 commits into from
Sep 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d0813e8
create LinkHash and check out dist-info-metadata (PEP 658)
cosmicexplorer May 11, 2022
28b6134
add NEWS entry
cosmicexplorer May 11, 2022
f675e09
explain why we don't validate hexdigests now
cosmicexplorer May 12, 2022
6675ba3
fix importlib.metadata case
cosmicexplorer May 13, 2022
80e044a
make it work without --use-feature=fast-deps!
cosmicexplorer May 15, 2022
266c5cd
respond to review comments
cosmicexplorer Sep 5, 2022
8da1bdc
ensure PEP 691 json parsing also supports PEP 658 dist-info-metadata
cosmicexplorer Sep 5, 2022
23e5492
add test case for dist-info-metadata key from PEP 691!!
cosmicexplorer Sep 5, 2022
a685a98
remove unused code after html5lib was removed!!
cosmicexplorer Sep 5, 2022
96bd60e
rename WheelMetadata to InMemoryMetadata as per review comments
cosmicexplorer Sep 5, 2022
2aa1c2f
rename from_metadata_file{,_contents}() to avoid ambiguity pointed ou…
cosmicexplorer Sep 5, 2022
e5b2fcd
add tests for PEP 658 metadata with wheel files too
cosmicexplorer Sep 5, 2022
89f235d
add a note about further testing the json client
cosmicexplorer Sep 5, 2022
fc9bcdd
refactor and rename some variables in a confusing code of terse code
cosmicexplorer Sep 7, 2022
036bfc0
update message for MetadataIncohsistent to allow use with PEP 658 met…
cosmicexplorer Sep 7, 2022
8aeb108
raise MetadataInconsistent if the name from METADATA doesn't match th…
cosmicexplorer Sep 7, 2022
931501f
Use pathlib shorthand to write file
uranusjr Sep 7, 2022
f8d6dae
Switch to f-string
uranusjr Sep 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/11111.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet.
11 changes: 5 additions & 6 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,8 @@ class MetadataInconsistent(InstallationError):
"""Built metadata contains inconsistent information.

This is raised when the metadata contains values (e.g. name and version)
that do not match the information previously obtained from sdist filename
or user-supplied ``#egg=`` value.
that do not match the information previously obtained from sdist filename,
user-supplied ``#egg=`` value, or an install requirement name.
"""

def __init__(
Expand All @@ -348,11 +348,10 @@ def __init__(
self.m_val = m_val

def __str__(self) -> str:
template = (
"Requested {} has inconsistent {}: "
"filename has {!r}, but metadata has {!r}"
return (
f"Requested {self.ireq} has inconsistent {self.field}: "
f"expected {self.f_val!r}, but metadata has {self.m_val!r}"
)
return template.format(self.ireq, self.field, self.f_val, self.m_val)


class LegacyInstallFailure(DiagnosticPipError):
Expand Down
120 changes: 5 additions & 115 deletions src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@
import json
import logging
import os
import re
import urllib.parse
import urllib.request
import xml.etree.ElementTree
from html.parser import HTMLParser
from optparse import Values
from typing import (
Expand All @@ -39,7 +37,7 @@
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.filetypes import is_archive_file
from pip._internal.utils.misc import pairwise, redact_auth_from_url
from pip._internal.utils.misc import redact_auth_from_url
from pip._internal.vcs import vcs

from .sources import CandidatesFromPage, LinkSource, build_source
Expand All @@ -51,7 +49,6 @@

logger = logging.getLogger(__name__)

HTMLElement = xml.etree.ElementTree.Element
ResponseHeaders = MutableMapping[str, str]


Expand Down Expand Up @@ -191,94 +188,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
return None


def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
return urllib.parse.quote(urllib.parse.unquote(part))


def _clean_file_url_path(part: str) -> str:
"""
Clean the first part of a URL path that corresponds to a local
filesystem path (i.e. the first part after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
# Also, on Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
return urllib.request.pathname2url(urllib.request.url2pathname(part))


# percent-encoded: /
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)


def _clean_url_path(path: str, is_local_path: bool) -> str:
"""
Clean the path portion of a URL.
"""
if is_local_path:
clean_func = _clean_file_url_path
else:
clean_func = _clean_url_path_part

# Split on the reserved characters prior to cleaning so that
# revision strings in VCS URLs are properly preserved.
parts = _reserved_chars_re.split(path)

cleaned_parts = []
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
cleaned_parts.append(clean_func(to_clean))
# Normalize %xx escapes (e.g. %2f -> %2F)
cleaned_parts.append(reserved.upper())

return "".join(cleaned_parts)


def _clean_link(url: str) -> str:
"""
Make sure a link is fully quoted.
For example, if ' ' occurs in the URL, it will be replaced with "%20",
and without double-quoting other characters.
"""
# Split the URL into parts according to the general structure
# `scheme://netloc/path;parameters?query#fragment`.
result = urllib.parse.urlparse(url)
# If the netloc is empty, then the URL refers to a local filesystem path.
is_local_path = not result.netloc
path = _clean_url_path(result.path, is_local_path=is_local_path)
return urllib.parse.urlunparse(result._replace(path=path))


def _create_link_from_element(
element_attribs: Dict[str, Optional[str]],
page_url: str,
base_url: str,
) -> Optional[Link]:
"""
Convert an anchor element's attributes in a simple repository page to a Link.
"""
href = element_attribs.get("href")
if not href:
return None

url = _clean_link(urllib.parse.urljoin(base_url, href))
pyrequire = element_attribs.get("data-requires-python")
yanked_reason = element_attribs.get("data-yanked")

link = Link(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
)

return link


class CacheablePageContent:
def __init__(self, page: "IndexContent") -> None:
assert page.cache_link_parsing
Expand Down Expand Up @@ -326,25 +235,10 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
data = json.loads(page.content)
for file in data.get("files", []):
file_url = file.get("url")
if file_url is None:
link = Link.from_json(file, page.url)
if link is None:
continue

# The Link.yanked_reason expects an empty string instead of a boolean.
yanked_reason = file.get("yanked")
if yanked_reason and not isinstance(yanked_reason, str):
yanked_reason = ""
# The Link.yanked_reason expects None instead of False
elif not yanked_reason:
yanked_reason = None

yield Link(
_clean_link(urllib.parse.urljoin(page.url, file_url)),
comes_from=page.url,
requires_python=file.get("requires-python"),
yanked_reason=yanked_reason,
hashes=file.get("hashes", {}),
)
yield link
return

parser = HTMLLinkParser(page.url)
Expand All @@ -354,11 +248,7 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
url = page.url
base_url = parser.base_url or url
for anchor in parser.anchors:
link = _create_link_from_element(
anchor,
page_url=url,
base_url=base_url,
)
link = Link.from_element(anchor, page_url=url, base_url=base_url)
if link is None:
continue
yield link
Expand Down
22 changes: 22 additions & 0 deletions src/pip/_internal/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,25 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio
:param canonical_name: Normalized project name of the given wheel.
"""
return select_backend().Distribution.from_wheel(wheel, canonical_name)


def get_metadata_distribution(
metadata_contents: bytes,
filename: str,
canonical_name: str,
) -> BaseDistribution:
"""Get the dist representation of the specified METADATA file contents.

This returns a Distribution instance from the chosen backend sourced from the data
in `metadata_contents`.

:param metadata_contents: Contents of a METADATA file within a dist, or one served
via PEP 658.
:param filename: Filename for the dist this metadata represents.
:param canonical_name: Normalized project name of the given dist.
"""
return select_backend().Distribution.from_metadata_file_contents(
metadata_contents,
filename,
canonical_name,
)
18 changes: 18 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,24 @@ def from_directory(cls, directory: str) -> "BaseDistribution":
"""
raise NotImplementedError()

@classmethod
def from_metadata_file_contents(
cls,
metadata_contents: bytes,
filename: str,
project_name: str,
) -> "BaseDistribution":
"""Load the distribution from the contents of a METADATA file.

This is used to implement PEP 658 by generating a "shallow" dist object that can
be used for resolution without downloading or building the actual dist yet.

:param metadata_contents: The contents of a METADATA file.
:param filename: File name for the dist with this metadata.
:param project_name: Name of the project this dist represents.
"""
raise NotImplementedError()

@classmethod
def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
"""Load the distribution from a given wheel.
Expand Down
18 changes: 18 additions & 0 deletions src/pip/_internal/metadata/importlib/_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from pip._internal.utils.misc import normalize_path
from pip._internal.utils.packaging import safe_extra
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file

from ._compat import BasePath, get_dist_name
Expand Down Expand Up @@ -109,6 +110,23 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist = importlib.metadata.Distribution.at(info_location)
return cls(dist, info_location, info_location.parent)

@classmethod
def from_metadata_file_contents(
cls,
metadata_contents: bytes,
filename: str,
project_name: str,
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
) -> BaseDistribution:
# Generate temp dir to contain the metadata file, and write the file contents.
temp_dir = pathlib.Path(
TempDirectory(kind="metadata", globally_managed=True).path
)
metadata_path = temp_dir / "METADATA"
metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
return cls(dist, metadata_path.parent, None)

@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
try:
Expand Down
23 changes: 20 additions & 3 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class EntryPoint(NamedTuple):
group: str


class WheelMetadata:
class InMemoryMetadata:
"""IMetadataProvider that reads metadata files from a dictionary.

This also maps metadata decoding exceptions to our internal exception type.
Expand Down Expand Up @@ -92,12 +92,29 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
return cls(dist)

@classmethod
def from_metadata_file_contents(
cls,
metadata_contents: bytes,
filename: str,
project_name: str,
) -> BaseDistribution:
metadata_dict = {
"METADATA": metadata_contents,
}
dist = pkg_resources.DistInfoDistribution(
location=filename,
metadata=InMemoryMetadata(metadata_dict, filename),
project_name=project_name,
)
return cls(dist)

@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
try:
with wheel.as_zipfile() as zf:
info_dir, _ = parse_wheel(zf, name)
metadata_text = {
metadata_dict = {
path.split("/", 1)[-1]: read_wheel_metadata_file(zf, path)
for path in zf.namelist()
if path.startswith(f"{info_dir}/")
Expand All @@ -108,7 +125,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
dist = pkg_resources.DistInfoDistribution(
location=wheel.location,
metadata=WheelMetadata(metadata_text, wheel.location),
metadata=InMemoryMetadata(metadata_dict, wheel.location),
project_name=name,
)
return cls(dist)
Expand Down
Loading