Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resolvelib: Stop resolving requirements one-by-one #488

Merged
merged 22 commits into from
Jan 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b7d46c8
resolvelib: Stop resolving requirements one-by-one
tetsuo-cpp Jan 17, 2023
90e17cf
requirement, resolvelib: Propagate origin requirements for each candi…
tetsuo-cpp Jan 18, 2023
20d16ee
requirement: Use the new origin requirements field
tetsuo-cpp Jan 18, 2023
1f7b15d
Merge branch 'main' into alex/dep-resolution-fix
tetsuo-cpp Jan 18, 2023
8e65f89
Merge remote-tracking branch 'origin/main' into alex/dep-resolution-fix
tetsuo-cpp Jan 25, 2023
9517b8d
requirement: Fix `--skip-editable` flag
tetsuo-cpp Jan 25, 2023
7556eb6
Merge remote-tracking branch 'origin/alex/pip-requirements-parser-fix…
tetsuo-cpp Jan 25, 2023
3e396a6
resolvelib: Fix lint
tetsuo-cpp Jan 25, 2023
74cde16
Merge remote-tracking branch 'origin/main' into alex/dep-resolution-fix
tetsuo-cpp Jan 26, 2023
d547d05
test: Remove incorrect online annotation
tetsuo-cpp Jan 26, 2023
5eeef40
requirement, resolvelib: Rename dependee map variables/functions
tetsuo-cpp Jan 26, 2023
4935dd0
test: Get a few resolvelib tests working
tetsuo-cpp Jan 26, 2023
92bc864
resolvelib: Return a dummy candidate when a project can't be found on…
tetsuo-cpp Jan 27, 2023
9dd0fbf
test: Get tests passing
tetsuo-cpp Jan 27, 2023
f0d6b5d
requirement, resolvelib: Fix lint
tetsuo-cpp Jan 27, 2023
894168e
resolvelib: Document new classes
tetsuo-cpp Jan 27, 2023
e16fcd9
Merge branch 'main' into alex/dep-resolution-fix
tetsuo-cpp Jan 27, 2023
2912a2f
CHANGELOG: Add changelog entry
tetsuo-cpp Jan 27, 2023
8709ef6
dependency_source: Mark the interface with "no cover"
tetsuo-cpp Jan 27, 2023
2e4079b
test: Fill out remaining coverage
tetsuo-cpp Jan 28, 2023
a2d8cf3
Merge branch 'main' into alex/dep-resolution-fix
tetsuo-cpp Jan 28, 2023
cecbb6c
requirement: Use `pass` statement to check for coverage and mark
tetsuo-cpp Jan 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 4 additions & 16 deletions pip_audit/_dependency_source/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,32 +61,20 @@ class DependencyFixError(Exception):

class DependencyResolver(ABC):
"""
Represents an abstract resolver of Python dependencies that takes a single
dependency and returns all of its transitive dependencies.
Represents an abstract resolver of Python dependencies that takes a list of
dependencies and returns all of their transitive dependencies.

Concrete dependency sources may use a resolver as part of their
implementation.
"""

@abstractmethod
def resolve(self, req: Requirement) -> list[Dependency]: # pragma: no cover
def resolve(self, reqs: list[Requirement]) -> list[Dependency]:
"""
Resolve a single `Requirement` into a list of `Dependency` instances.
Resolve a list of `Requirement`s into a list of resolved `Dependency`s.
"""
raise NotImplementedError

def resolve_all(
self, reqs: Iterator[Requirement]
) -> Iterator[tuple[Requirement, list[Dependency]]]:
"""
Resolve a collection of `Requirement`s into their respective `Dependency` sets.

`DependencyResolver` implementations can override this implementation with
a more optimized one.
"""
for req in reqs:
yield (req, self.resolve(req))


class DependencyResolverError(Exception):
"""
Expand Down
50 changes: 23 additions & 27 deletions pip_audit/_dependency_source/requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(
self._require_hashes = require_hashes
self._no_deps = no_deps
self.state = state
self._dep_cache: dict[Path, dict[Requirement, set[Dependency]]] = {}
self._dep_cache: dict[Path, set[Dependency]] = {}

def collect(self) -> Iterator[Dependency]:
"""
Expand Down Expand Up @@ -113,7 +113,7 @@ def collect(self) -> Iterator[Dependency]:
req_names.add(req.name)
reqs.append(req)

for _, dep in self._collect_cached_deps(filename, reqs):
for dep in self._collect_cached_deps(filename, reqs):
if dep in collected:
continue
collected.add(dep)
Expand Down Expand Up @@ -205,9 +205,12 @@ def _fix_file(self, filename: Path, fix_version: ResolvedFixVersion) -> None:
r for r in reqs if isinstance(r, InstallRequirement)
]
origin_reqs: set[Requirement] = set()
for req, dep in self._collect_cached_deps(filename, list(installed_reqs)):
for dep in self._collect_cached_deps(filename, list(installed_reqs)):
# NOTE(alex): Now we can't map requirements to dependencies, this is broken.
# We'll have to figure out to reconstruct this information.
if fix_version.dep == dep:
origin_reqs.add(req)
# origin_reqs.add(req)
pass
if origin_reqs:
logger.warning(
"added fixed subdependency explicitly to requirements file "
Expand Down Expand Up @@ -292,19 +295,17 @@ def _build_hash_options_mapping(self, hash_options: list[str]) -> dict[str, list

def _collect_cached_deps(
self, filename: Path, reqs: list[InstallRequirement]
) -> Iterator[tuple[Requirement, Dependency]]:
) -> Iterator[Dependency]:
"""
Collect resolved dependencies for a given requirements file, retrieving them from the
dependency cache if possible.
"""
# See if we've already have cached dependencies for this file
cached_deps_for_file = self._dep_cache.get(filename, None)
if cached_deps_for_file is not None:
for req, deps in cached_deps_for_file.items():
for dep in deps:
yield req, dep
yield from cached_deps_for_file

new_cached_deps_for_file: dict[Requirement, set[Dependency]] = dict()
new_cached_deps_for_file: set[Dependency] = set()

# There are three cases where we skip dependency resolution:
#
Expand All @@ -318,27 +319,22 @@ def _collect_cached_deps(
for req, dep in self._collect_preresolved_deps(
iter(reqs), require_hashes=require_hashes
):
if req not in new_cached_deps_for_file:
new_cached_deps_for_file[req] = set()
new_cached_deps_for_file[req].add(dep)
yield req, dep
new_cached_deps_for_file.add(dep)
yield dep
else:
# Invoke the dependency resolver to turn requirements into dependencies
req_values: list[Requirement] = [r.req for r in reqs]
for req, resolved_deps in self._resolver.resolve_all(iter(req_values)):
for dep in resolved_deps:
if req not in new_cached_deps_for_file:
new_cached_deps_for_file[req] = set()
new_cached_deps_for_file[req].add(dep)

if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

yield req, dep
for dep in self._resolver.resolve(req_values):
new_cached_deps_for_file.add(dep)

if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

yield dep

# Cache the collected dependencies
self._dep_cache[filename] = new_cached_deps_for_file
Expand Down
46 changes: 27 additions & 19 deletions pip_audit/_dependency_source/resolvelib/pypi_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from resolvelib.resolvers import RequirementInformation

from pip_audit._cache import caching_session
from pip_audit._service import SkippedDependency
from pip_audit._state import AuditState
from pip_audit._util import python_version
from pip_audit._virtual_env import VirtualEnv, VirtualEnvError
Expand Down Expand Up @@ -327,6 +328,7 @@ def __init__(
self.timeout = timeout
self.session = caching_session(cache_dir, use_pip=True)
self._state = state
self.skip_deps: list[SkippedDependency] = []

def identify(self, requirement_or_candidate: Requirement | Candidate) -> str:
"""
Expand Down Expand Up @@ -371,25 +373,31 @@ def find_matches(
# Need to pass the extras to the search, so they
# are added to the candidate at creation - we
# treat candidates as immutable once created.
candidates = sorted(
[
candidate
for candidate in get_project_from_indexes(
self.index_urls, self.session, identifier, extras, self.timeout, self._state
)
if candidate.version not in bad_versions
and all(candidate.version in r.specifier for r in requirements)
# HACK(ww): Additionally check that each candidate's name matches the
# expected project name (identifier).
# This technically shouldn't be required, but parsing distribution names
# from package indices is imprecise/unreliable when distribution filenames
# are PEP 440 compliant but not normalized.
# See: https://github.com/pypa/packaging/issues/527
and candidate.name == identifier
],
key=attrgetter("version", "is_wheel"),
reverse=True,
)
try:
candidates = sorted(
[
candidate
for candidate in get_project_from_indexes(
self.index_urls, self.session, identifier, extras, self.timeout, self._state
)
if candidate.version not in bad_versions
and all(candidate.version in r.specifier for r in requirements)
# HACK(ww): Additionally check that each candidate's name matches the
# expected project name (identifier).
# This technically shouldn't be required, but parsing distribution names
# from package indices is imprecise/unreliable when distribution filenames
# are PEP 440 compliant but not normalized.
# See: https://github.com/pypa/packaging/issues/527
and candidate.name == identifier
],
key=attrgetter("version", "is_wheel"),
reverse=True,
)
except PyPINotFoundError as e:
skip_reason = str(e)
logger.debug(skip_reason)
self.skip_deps.append(SkippedDependency(name=identifier, skip_reason=skip_reason))
return
Copy link
Contributor Author

@tetsuo-cpp tetsuo-cpp Jan 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@woodruffw I just found a serious problem here as I was fixing unit tests. The fact that we record the skipped candidate is fine, but then we proceed to return 0 candidates for a given requirement. This causes the resolver to raise a ResolutionImpossible.

In the past, we just skipped the top-level requirement and then try resolving the next one (since it was doing one at a time). But the resolver is "all or nothing" so we can't really get partial results from it. And once we get into find_matches, I don't think there's a way to backtrack and say "nevermind, just forget about that requirement I said I needed earlier". I'm at a bit of a loss figuring out how to fix this.

Do you have any ideas? I was considering doing something like creating a dummy candidate without any dependencies just to get past the resolver.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is reminding me why I thought "one at a time" made sense despite not accurately reflecting the actual dependency resolution process 😅

IMO a dummy candidate here is fine, but we should probably tread carefully -- it'd be good to have it be a separate type entirely similarly to how we handle SkippedDependency (SkippedCandidate maybe?).


# If we have multiple candidates for a single version and some are wheels,
# yield only the wheels. This keeps us from wasting a large amount of
Expand Down
39 changes: 23 additions & 16 deletions pip_audit/_dependency_source/resolvelib/resolvelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pip_audit._service.interface import Dependency, ResolvedDependency, SkippedDependency
from pip_audit._state import AuditState

from .pypi_provider import PyPINotFoundError, PyPIProvider
from .pypi_provider import PyPIProvider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -58,29 +58,36 @@ def __init__(
self.resolver: Resolver = Resolver(self.provider, self.reporter)
self._skip_editable = skip_editable

def resolve(self, req: Requirement) -> list[Dependency]:
def resolve(self, reqs: list[Requirement]) -> list[Dependency]:
"""
Resolve the given `Requirement` into a `Dependency` list.
"""

# HACK: `resolve` takes both `packaging.Requirement` and `pip_api.Requirement`,
# since the latter is a subclass. But only the latter knows whether the
# requirement is editable, so we need to check for it here.
if isinstance(req, ParsedRequirement):
if req.editable and self._skip_editable:
return [
SkippedDependency(name=req.name, skip_reason="requirement marked as editable")
]

deps: list[Dependency] = []

reqs_to_resolve: list[Requirement] = []
for req in reqs:
# HACK: `resolve` takes both `packaging.Requirement` and `pip_api.Requirement`,
# since the latter is a subclass. But only the latter knows whether the
# requirement is editable, so we need to check for it here.
if isinstance(req, ParsedRequirement):
if req.editable and self._skip_editable:
deps.append(
SkippedDependency(
name=req.name, skip_reason="requirement marked as editable"
)
)
continue
reqs_to_resolve.append(req)

try:
result = self.resolver.resolve([req])
except PyPINotFoundError as e:
skip_reason = str(e)
logger.debug(skip_reason)
return [SkippedDependency(name=req.name, skip_reason=skip_reason)]
result = self.resolver.resolve(reqs_to_resolve)
except HTTPError as e:
raise ResolveLibResolverError("failed to resolve dependencies") from e

# If the provider encountered any dependencies it couldn't find on PyPI, they'll be here.
deps.extend(self.provider.skip_deps)

for name, candidate in result.mapping.items():
deps.append(ResolvedDependency(name, candidate.version))
return deps
Expand Down