Skip to content

Commit

Permalink
resolvelib: Stop resolving requirements one-by-one (#488)
Browse files Browse the repository at this point in the history
* resolvelib: Stop resolving requirements one-by-one

* requirement, resolvelib: Propagate origin requirements for each candidate

* requirement: Use the new origin requirements field

* requirement: Fix `--skip-editable` flag

* resolvelib: Fix lint

* test: Remove incorrect online annotation

* requirement, resolvelib: Rename dependee map variables/functions

* test: Get a few resolvelib tests working

* resolvelib: Return a dummy candidate when a project can't be found on PyPI

* test: Get tests passing

* requirement, resolvelib: Fix lint

* resolvelib: Document new classes

* CHANGELOG: Add changelog entry

* dependency_source: Mark the interface with "no cover"

* test: Fill out remaining coverage

* requirement: Use `pass` statement to check for coverage and mark
`continue` with `no cover`
  • Loading branch information
tetsuo-cpp authored Jan 28, 2023
1 parent ab078a5 commit d549edc
Show file tree
Hide file tree
Showing 10 changed files with 481 additions and 236 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ All versions prior to 0.0.9 are untracked.
with an internal API change
([#499](https://github.com/pypa/pip-audit/pull/499))

* Fixed a dependency resolution bug that can potentially be triggered when
multiple packages have the same subdependency
([#488](https://github.com/pypa/pip-audit/pull/488))

## [2.4.14]

### Fixed
Expand Down
20 changes: 4 additions & 16 deletions pip_audit/_dependency_source/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,34 +157,22 @@ def supported_algorithms(self, req_name: str) -> list[str]:

class DependencyResolver(ABC):
"""
Represents an abstract resolver of Python dependencies that takes a single
dependency and returns all of its transitive dependencies.
Represents an abstract resolver of Python dependencies that takes a list of
dependencies and returns all of their transitive dependencies.
Concrete dependency sources may use a resolver as part of their
implementation.
"""

@abstractmethod
def resolve(
self, req: Requirement, req_hashes: RequirementHashes
self, reqs: list[Requirement], req_hashes: RequirementHashes
) -> list[Dependency]: # pragma: no cover
"""
Resolve a single `Requirement` into a list of `Dependency` instances.
Resolve a list of `Requirement`s into a list of resolved `Dependency`s.
"""
raise NotImplementedError

def resolve_all(
self, reqs: Iterator[Requirement], req_hashes: RequirementHashes
) -> Iterator[tuple[Requirement, list[Dependency]]]:
"""
Resolve a collection of `Requirement`s into their respective `Dependency` sets.
`DependencyResolver` implementations can override this implementation with
a more optimized one.
"""
for req in reqs:
yield (req, self.resolve(req, req_hashes))


class DependencyResolverError(Exception):
"""
Expand Down
25 changes: 10 additions & 15 deletions pip_audit/_dependency_source/pyproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,16 @@ def collect(self) -> Iterator[Dependency]:
reqs: list[Requirement] = [Requirement(dep) for dep in deps]
req_hashes = RequirementHashes()
try:
for _, deps in self.resolver.resolve_all(iter(reqs), req_hashes):
for dep in deps:
# Don't allow duplicate dependencies to be returned
if dep in collected:
continue

if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

collected.add(dep)
yield dep
for dep in self.resolver.resolve(reqs, req_hashes):
if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

collected.add(dep)
yield dep
except DependencyResolverError as dre:
raise PyProjectSourceError("dependency resolver raised an error") from dre

Expand Down
129 changes: 77 additions & 52 deletions pip_audit/_dependency_source/requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import re
import shutil
from contextlib import ExitStack
from dataclasses import dataclass, field
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import IO, Iterator, cast
Expand Down Expand Up @@ -76,15 +77,15 @@ def __init__(
self._no_deps = no_deps
self._skip_editable = skip_editable
self.state = state
self._dep_cache: dict[Path, dict[Requirement, set[Dependency]]] = {}
self._dep_cache: dict[Path, set[Dependency]] = {}

def collect(self) -> Iterator[Dependency]:
"""
Collect all of the dependencies discovered by this `RequirementSource`.
Raises a `RequirementSourceError` on any errors.
"""
collected: set[Dependency] = set()
collected: dict[str, Dependency] = dict()
for filename in self._filenames:
try:
rf = RequirementsFile.from_file(filename)
Expand Down Expand Up @@ -123,14 +124,42 @@ def collect(self) -> Iterator[Dependency]:
req_names.add(req.name)
reqs.append(req)

for _, dep in self._collect_cached_deps(filename, reqs):
if dep in collected:
for dep in self._collect_cached_deps(filename, reqs):
if dep.canonical_name in collected:
existing_dep = collected[dep.canonical_name]
if isinstance(dep, SkippedDependency) or isinstance(
existing_dep, SkippedDependency
):
# The `continue` statement is incorrectly flagged as uncovered for
# Python <= 3.9.
#
# Let's add a `pass` here as a way to make sure this branch gets tested
# and then mark the `continue` with `no cover`.
#
# See: https://github.com/pytest-dev/pytest-cov/issues/546
pass
continue # pragma: no cover

dep = cast(RequirementDependency, dep)
existing_dep = cast(RequirementDependency, existing_dep)

# If we have the same dependency generated from multiple files, we need to
# merge the dependee requirements.
combined_dep = RequirementDependency(
name=dep.name,
version=dep.version,
dependee_reqs=(dep.dependee_reqs | existing_dep.dependee_reqs),
)

collected[dep.canonical_name] = combined_dep
continue
collected.add(dep)
yield dep

collected[dep.canonical_name] = dep
except DependencyResolverError as dre:
raise RequirementSourceError(str(dre))

yield from collected.values()

def fix(self, fix_version: ResolvedFixVersion) -> None:
"""
Fixes a dependency version for this `RequirementSource`.
Expand Down Expand Up @@ -209,30 +238,24 @@ def _fix_file(self, filename: Path, fix_version: ResolvedFixVersion) -> None:
# To know whether this is the case, we'll need to resolve dependencies if we haven't
# already in order to figure out whether this subdependency belongs to this file or
# another.
try:
if not fixed:
installed_reqs: list[InstallRequirement] = [
r for r in reqs if isinstance(r, InstallRequirement)
]
origin_reqs: set[Requirement] = set()
for req, dep in self._collect_cached_deps(filename, list(installed_reqs)):
if fix_version.dep == dep:
origin_reqs.add(req)
if origin_reqs:
logger.warning(
"added fixed subdependency explicitly to requirements file "
f"{filename}: {fix_version.dep.canonical_name}"
)
origin_reqs_formatted = ",".join(
[str(req) for req in sorted(list(origin_reqs), key=lambda x: x.name)]
)
print(
f" # pip-audit: subdependency fixed via {origin_reqs_formatted}",
file=f,
)
print(f"{fix_version.dep.canonical_name}=={fix_version.version}", file=f)
except DependencyResolverError as dre:
raise RequirementFixError(str(dre)) from dre
if not fixed:
req_dep = cast(RequirementDependency, fix_version.dep)
if req_dep.dependee_reqs:
logger.warning(
"added fixed subdependency explicitly to requirements file "
f"{filename}: {fix_version.dep.canonical_name}"
)
dependee_reqs_formatted = ",".join(
[
str(req)
for req in sorted(list(req_dep.dependee_reqs), key=lambda x: x.name)
]
)
print(
f" # pip-audit: subdependency fixed via {dependee_reqs_formatted}",
file=f,
)
print(f"{fix_version.dep.canonical_name}=={fix_version.version}", file=f)

def _recover_files(self, tmp_files: list[IO[str]]) -> None:
for (filename, tmp_file) in zip(self._filenames, tmp_files):
Expand Down Expand Up @@ -274,7 +297,7 @@ def _collect_preresolved_deps(
f"requirement {req.name} is not pinned to an exact version: {str(req)}"
)

yield req.req, ResolvedDependency(
yield req.req, RequirementDependency(
req.name, Version(pinned_specifier.group("version"))
)

Expand All @@ -293,27 +316,23 @@ def _build_hash_options_mapping(self, hash_options: list[str]) -> dict[str, list

def _collect_cached_deps(
self, filename: Path, reqs: list[InstallRequirement]
) -> Iterator[tuple[Requirement, Dependency]]:
) -> Iterator[Dependency]:
"""
Collect resolved dependencies for a given requirements file, retrieving them from the
dependency cache if possible.
"""
# See if we've already have cached dependencies for this file
cached_deps_for_file = self._dep_cache.get(filename, None)
if cached_deps_for_file is not None:
for req, deps in cached_deps_for_file.items():
for dep in deps:
yield req, dep
yield from cached_deps_for_file

new_cached_deps_for_file: dict[Requirement, set[Dependency]] = dict()
new_cached_deps_for_file: set[Dependency] = set()

# Skip dependency resolution if the user has specified `--no-deps`
if self._no_deps:
for req, dep in self._collect_preresolved_deps(iter(reqs)):
if req not in new_cached_deps_for_file:
new_cached_deps_for_file[req] = set()
new_cached_deps_for_file[req].add(dep)
yield req, dep
new_cached_deps_for_file.add(dep)
yield dep
else:
require_hashes = self._require_hashes or any(req.hash_options for req in reqs)
req_hashes = RequirementHashes()
Expand All @@ -331,20 +350,17 @@ def _collect_cached_deps(

# Invoke the dependency resolver to turn requirements into dependencies
req_values: list[Requirement] = [r.req for r in reqs]
for req, resolved_deps in self._resolver.resolve_all(iter(req_values), req_hashes):
for dep in resolved_deps:
if req not in new_cached_deps_for_file:
new_cached_deps_for_file[req] = set()
new_cached_deps_for_file[req].add(dep)
for dep in self._resolver.resolve(req_values, req_hashes):
new_cached_deps_for_file.add(dep)

if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")
if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

yield req, dep
yield dep

# Cache the collected dependencies
self._dep_cache[filename] = new_cached_deps_for_file
Expand All @@ -360,3 +376,12 @@ class RequirementFixError(DependencyFixError):
"""A requirements-fixing specific `DependencyFixError`."""

pass


@dataclass(frozen=True)
class RequirementDependency(ResolvedDependency):
"""
Represents a fully resolved Python package from a requirements file.
"""

dependee_reqs: set[Requirement] = field(default_factory=set, hash=False)
Loading

0 comments on commit d549edc

Please sign in to comment.