Skip to content

Commit f19b20e

Browse files
chriskuehlradoering
authored andcommitted
perf: improve DependencyCache lru_cache hit rate, avoid iterating through lots of levels (#7950)
1 parent c301368 commit f19b20e

File tree

2 files changed

+111
-60
lines changed

2 files changed

+111
-60
lines changed

src/poetry/mixology/version_solver.py

+62-32
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import time
66

77
from typing import TYPE_CHECKING
8+
from typing import Optional
9+
from typing import Tuple
810

911
from poetry.core.packages.dependency import Dependency
1012

@@ -29,6 +31,11 @@
2931
_conflict = object()
3032

3133

34+
DependencyCacheKey = Tuple[
35+
str, Optional[str], Optional[str], Optional[str], Optional[str]
36+
]
37+
38+
3239
class DependencyCache:
3340
"""
3441
A cache of the valid dependencies.
@@ -40,36 +47,37 @@ class DependencyCache:
4047

4148
def __init__(self, provider: Provider) -> None:
4249
self._provider = provider
43-
self._cache: dict[
44-
int,
45-
dict[
46-
tuple[str, str | None, str | None, str | None, str | None],
47-
list[DependencyPackage],
48-
],
49-
] = collections.defaultdict(dict)
5050

51-
self.search_for = functools.lru_cache(maxsize=128)(self._search_for)
51+
# self._cache maps a package name to a stack of cached package lists,
52+
# ordered by the decision level which added them to the cache. This is
53+
# done so that when backtracking we can maintain cache entries from
54+
# previous decision levels, while clearing cache entries from only the
55+
# rolled back levels.
56+
#
57+
# In order to maintain the integrity of the cache, `clear_level()`
58+
# needs to be called in descending order as decision levels are
59+
# backtracked so that the correct items can be popped from the stack.
60+
self._cache: dict[DependencyCacheKey, list[list[DependencyPackage]]] = (
61+
collections.defaultdict(list)
62+
)
63+
self._cached_dependencies_by_level: dict[int, list[DependencyCacheKey]] = (
64+
collections.defaultdict(list)
65+
)
66+
67+
self._search_for_cached = functools.lru_cache(maxsize=128)(self._search_for)
5268

5369
def _search_for(
54-
self, dependency: Dependency, level: int
70+
self,
71+
dependency: Dependency,
72+
key: DependencyCacheKey,
5573
) -> list[DependencyPackage]:
56-
key = (
57-
dependency.complete_name,
58-
dependency.source_type,
59-
dependency.source_url,
60-
dependency.source_reference,
61-
dependency.source_subdirectory,
62-
)
63-
64-
for check_level in range(level, -1, -1):
65-
packages = self._cache[check_level].get(key)
66-
if packages is not None:
67-
packages = [
68-
p
69-
for p in packages
70-
if dependency.constraint.allows(p.package.version)
71-
]
72-
break
74+
cache_entries = self._cache[key]
75+
if cache_entries:
76+
packages = [
77+
p
78+
for p in cache_entries[-1]
79+
if dependency.constraint.allows(p.package.version)
80+
]
7381
else:
7482
packages = None
7583

@@ -83,12 +91,33 @@ def _search_for(
8391
if not packages:
8492
packages = self._provider.search_for(dependency)
8593

86-
self._cache[level][key] = packages
94+
return packages
95+
96+
def search_for(
97+
self,
98+
dependency: Dependency,
99+
decision_level: int,
100+
) -> list[DependencyPackage]:
101+
key = (
102+
dependency.complete_name,
103+
dependency.source_type,
104+
dependency.source_url,
105+
dependency.source_reference,
106+
dependency.source_subdirectory,
107+
)
108+
109+
packages = self._search_for_cached(dependency, key)
110+
if not self._cache[key] or self._cache[key][-1] is not packages:
111+
self._cache[key].append(packages)
112+
self._cached_dependencies_by_level[decision_level].append(key)
113+
87114
return packages
88115

89116
def clear_level(self, level: int) -> None:
90-
self.search_for.cache_clear()
91-
self._cache.pop(level, None)
117+
if level in self._cached_dependencies_by_level:
118+
self._search_for_cached.cache_clear()
119+
for key in self._cached_dependencies_by_level.pop(level):
120+
self._cache[key].pop()
92121

93122

94123
class VersionSolver:
@@ -327,9 +356,10 @@ def _resolve_conflict(self, incompatibility: Incompatibility) -> Incompatibility
327356
for level in range(
328357
self._solution.decision_level, previous_satisfier_level, -1
329358
):
330-
self._contradicted_incompatibilities.difference_update(
331-
self._contradicted_incompatibilities_by_level.pop(level, set()),
332-
)
359+
if level in self._contradicted_incompatibilities_by_level:
360+
self._contradicted_incompatibilities.difference_update(
361+
self._contradicted_incompatibilities_by_level.pop(level),
362+
)
333363
self._dependency_cache.clear_level(level)
334364

335365
self._solution.backtrack(previous_satisfier_level)

tests/mixology/version_solver/test_dependency_cache.py

+49-28
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ def test_solver_dependency_cache_respects_source_type(
3030
add_to_repo(repo, "demo", "1.0.0")
3131

3232
cache = DependencyCache(provider)
33-
cache.search_for.cache_clear()
33+
cache._search_for_cached.cache_clear()
3434

3535
# ensure cache was never hit for both calls
3636
cache.search_for(dependency_pypi, 0)
3737
cache.search_for(dependency_git, 0)
38-
assert not cache.search_for.cache_info().hits
38+
assert not cache._search_for_cached.cache_info().hits
3939

4040
# increase test coverage by searching for copies
4141
# (when searching for the exact same object, __eq__ is never called)
4242
packages_pypi = cache.search_for(deepcopy(dependency_pypi), 0)
4343
packages_git = cache.search_for(deepcopy(dependency_git), 0)
4444

45-
assert cache.search_for.cache_info().hits == 2
46-
assert cache.search_for.cache_info().currsize == 2
45+
assert cache._search_for_cached.cache_info().hits == 2
46+
assert cache._search_for_cached.cache_info().currsize == 2
4747

4848
assert len(packages_pypi) == len(packages_git) == 1
4949
assert packages_pypi != packages_git
@@ -65,38 +65,59 @@ def test_solver_dependency_cache_pulls_from_prior_level_cache(
6565
root: ProjectPackage, provider: Provider, repo: Repository
6666
) -> None:
6767
dependency_pypi = Factory.create_dependency("demo", ">=0.1.0")
68+
dependency_pypi_constrained = Factory.create_dependency("demo", ">=0.1.0,<2.0.0")
6869
root.add_dependency(dependency_pypi)
70+
root.add_dependency(dependency_pypi_constrained)
6971
add_to_repo(repo, "demo", "1.0.0")
7072

7173
wrapped_provider = mock.Mock(wraps=provider)
7274
cache = DependencyCache(wrapped_provider)
73-
cache.search_for.cache_clear()
75+
cache._search_for_cached.cache_clear()
7476

75-
# On first call, provider.search_for() should be called and the level-0
76-
# cache populated.
77+
# On first call, provider.search_for() should be called and the cache
78+
# populated.
7779
cache.search_for(dependency_pypi, 0)
7880
assert len(wrapped_provider.search_for.mock_calls) == 1
79-
assert ("demo", None, None, None, None) in cache._cache[0]
80-
assert cache.search_for.cache_info().hits == 0
81-
assert cache.search_for.cache_info().misses == 1
82-
83-
# On second call at level 1, provider.search_for() should not be called
84-
# again and the level-1 cache should be populated from the level-0 cache.
81+
assert ("demo", None, None, None, None) in cache._cache
82+
assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0]
83+
assert cache._search_for_cached.cache_info().hits == 0
84+
assert cache._search_for_cached.cache_info().misses == 1
85+
86+
# On second call at level 1, neither provider.search_for() nor
87+
# cache._search_for_cached() should have been called again, and the cache
88+
# should remain the same.
8589
cache.search_for(dependency_pypi, 1)
8690
assert len(wrapped_provider.search_for.mock_calls) == 1
87-
assert ("demo", None, None, None, None) in cache._cache[1]
88-
assert cache._cache[0] == cache._cache[1]
89-
assert cache.search_for.cache_info().hits == 0
90-
assert cache.search_for.cache_info().misses == 2
91-
92-
# Clearing the level 1 cache should invalidate the lru_cache on
93-
# cache.search_for and wipe out the level 1 cache while preserving the
91+
assert ("demo", None, None, None, None) in cache._cache
92+
assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0]
93+
assert set(cache._cached_dependencies_by_level.keys()) == {0}
94+
assert cache._search_for_cached.cache_info().hits == 1
95+
assert cache._search_for_cached.cache_info().misses == 1
96+
97+
# On third call at level 2 with an updated constraint for the `demo`
98+
# package should not call provider.search_for(), but should call
99+
# cache._search_for_cached() and update the cache.
100+
cache.search_for(dependency_pypi_constrained, 2)
101+
assert len(wrapped_provider.search_for.mock_calls) == 1
102+
assert ("demo", None, None, None, None) in cache._cache
103+
assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0]
104+
assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[2]
105+
assert set(cache._cached_dependencies_by_level.keys()) == {0, 2}
106+
assert cache._search_for_cached.cache_info().hits == 1
107+
assert cache._search_for_cached.cache_info().misses == 2
108+
109+
# Clearing the level 2 and level 1 caches should invalidate the lru_cache
110+
# on cache.search_for and wipe out the level 2 cache while preserving the
94111
# level 0 cache.
112+
cache.clear_level(2)
95113
cache.clear_level(1)
96-
assert set(cache._cache.keys()) == {0}
97-
assert ("demo", None, None, None, None) in cache._cache[0]
98-
assert cache.search_for.cache_info().hits == 0
99-
assert cache.search_for.cache_info().misses == 0
114+
cache.search_for(dependency_pypi, 0)
115+
assert len(wrapped_provider.search_for.mock_calls) == 1
116+
assert ("demo", None, None, None, None) in cache._cache
117+
assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0]
118+
assert set(cache._cached_dependencies_by_level.keys()) == {0}
119+
assert cache._search_for_cached.cache_info().hits == 0
120+
assert cache._search_for_cached.cache_info().misses == 1
100121

101122

102123
def test_solver_dependency_cache_respects_subdirectories(
@@ -123,20 +144,20 @@ def test_solver_dependency_cache_respects_subdirectories(
123144
root.add_dependency(dependency_one_copy)
124145

125146
cache = DependencyCache(provider)
126-
cache.search_for.cache_clear()
147+
cache._search_for_cached.cache_clear()
127148

128149
# ensure cache was never hit for both calls
129150
cache.search_for(dependency_one, 0)
130151
cache.search_for(dependency_one_copy, 0)
131-
assert not cache.search_for.cache_info().hits
152+
assert not cache._search_for_cached.cache_info().hits
132153

133154
# increase test coverage by searching for copies
134155
# (when searching for the exact same object, __eq__ is never called)
135156
packages_one = cache.search_for(deepcopy(dependency_one), 0)
136157
packages_one_copy = cache.search_for(deepcopy(dependency_one_copy), 0)
137158

138-
assert cache.search_for.cache_info().hits == 2
139-
assert cache.search_for.cache_info().currsize == 2
159+
assert cache._search_for_cached.cache_info().hits == 2
160+
assert cache._search_for_cached.cache_info().currsize == 2
140161

141162
assert len(packages_one) == len(packages_one_copy) == 1
142163

0 commit comments

Comments
 (0)