Skip to content

Commit cf441e7

Browse files
radoeringMasterNayru
authored andcommitted
performance(legacy_repository): introduce link cache to improve performance for legacy repositories
Co-authored-by: Jarrod Moore <[email protected]>
1 parent 88ba18d commit cf441e7

File tree

3 files changed

+73
-37
lines changed

3 files changed

+73
-37
lines changed

src/poetry/repositories/link_sources/base.py

+21-17
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import annotations
22

3+
import functools
34
import logging
45
import re
56

6-
from abc import abstractmethod
77
from typing import TYPE_CHECKING
88

99
from packaging.utils import canonicalize_name
@@ -15,6 +15,7 @@
1515

1616

1717
if TYPE_CHECKING:
18+
from collections import defaultdict
1819
from collections.abc import Iterator
1920

2021
from packaging.utils import NormalizedName
@@ -39,21 +40,16 @@ class LinkSource:
3940

4041
def __init__(self, url: str) -> None:
4142
self._url = url
43+
self._get_link_cache_wrapper = functools.lru_cache(maxsize=1)(
44+
self._get_link_cache
45+
)
4246

4347
@property
4448
def url(self) -> str:
4549
return self._url
4650

4751
def versions(self, name: str) -> Iterator[Version]:
48-
name = canonicalize_name(name)
49-
seen: set[Version] = set()
50-
51-
for link in self.links:
52-
pkg = self.link_package_data(link)
53-
54-
if pkg and pkg.name == name and pkg.version not in seen:
55-
seen.add(pkg.version)
56-
yield pkg.version
52+
yield from self._link_cache[canonicalize_name(name)]
5753

5854
@property
5955
def packages(self) -> Iterator[Package]:
@@ -64,9 +60,10 @@ def packages(self) -> Iterator[Package]:
6460
yield pkg
6561

6662
@property
67-
@abstractmethod
6863
def links(self) -> Iterator[Link]:
69-
raise NotImplementedError()
64+
for links_per_version in self._link_cache.values():
65+
for links in links_per_version.values():
66+
yield from links
7067

7168
@classmethod
7269
def link_package_data(cls, link: Link) -> Package | None:
@@ -102,11 +99,7 @@ def link_package_data(cls, link: Link) -> Package | None:
10299
def links_for_version(
103100
self, name: NormalizedName, version: Version
104101
) -> Iterator[Link]:
105-
for link in self.links:
106-
pkg = self.link_package_data(link)
107-
108-
if pkg and pkg.name == name and pkg.version == version:
109-
yield link
102+
yield from self._link_cache[name][version]
110103

111104
def clean_link(self, url: str) -> str:
112105
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
@@ -127,3 +120,14 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool:
127120
if reasons:
128121
return "\n".join(sorted(reasons))
129122
return True
123+
124+
@property
125+
def _link_cache(
126+
self,
127+
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
128+
return self._get_link_cache_wrapper()
129+
130+
def _get_link_cache(
131+
self,
132+
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
133+
raise NotImplementedError()

src/poetry/repositories/link_sources/html.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,20 @@
33
import urllib.parse
44
import warnings
55

6+
from collections import defaultdict
67
from html import unescape
78
from typing import TYPE_CHECKING
89

10+
from packaging.utils import canonicalize_name
911
from poetry.core.packages.utils.link import Link
12+
from poetry.core.semver.version import Version
1013

1114
from poetry.repositories.link_sources.base import LinkSource
1215

1316

1417
if TYPE_CHECKING:
15-
from collections.abc import Iterator
18+
from packaging.utils import NormalizedName
19+
1620

1721
with warnings.catch_warnings():
1822
warnings.simplefilter("ignore")
@@ -25,8 +29,12 @@ def __init__(self, url: str, content: str) -> None:
2529

2630
self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
2731

28-
@property
29-
def links(self) -> Iterator[Link]:
32+
def _get_link_cache(
33+
self,
34+
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
35+
links: defaultdict[
36+
NormalizedName, defaultdict[Version, list[Link]]
37+
] = defaultdict(lambda: defaultdict(list))
3038
for anchor in self._parsed.findall(".//a"):
3139
if anchor.get("href"):
3240
href = anchor.get("href")
@@ -44,7 +52,14 @@ def links(self) -> Iterator[Link]:
4452
if link.ext not in self.SUPPORTED_FORMATS:
4553
continue
4654

47-
yield link
55+
pkg = self.link_package_data(link)
56+
if pkg:
57+
links[pkg.name][pkg.version].append(link)
58+
else:
59+
# dummy name and version
60+
links[canonicalize_name("")][Version.parse("0")].append(link)
61+
62+
return links
4863

4964

5065
class SimpleRepositoryPage(HTMLPage):

tests/repositories/link_sources/test_base.py

+33-16
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from __future__ import annotations
22

3+
from collections import defaultdict
34
from typing import TYPE_CHECKING
4-
from unittest.mock import PropertyMock
55

66
import pytest
77

8+
from packaging.utils import NormalizedName
89
from packaging.utils import canonicalize_name
910
from poetry.core.packages.package import Package
1011
from poetry.core.packages.utils.link import Link
@@ -22,21 +23,37 @@
2223
@pytest.fixture
2324
def link_source(mocker: MockerFixture) -> LinkSource:
2425
url = "https://example.org"
25-
link_source = LinkSource(url)
26-
mocker.patch(
27-
f"{LinkSource.__module__}.{LinkSource.__qualname__}.links",
28-
new_callable=PropertyMock,
29-
return_value=iter(
30-
[
31-
Link(f"{url}/demo-0.1.0.tar.gz"),
32-
Link(f"{url}/demo-0.1.0_invalid.tar.gz"),
33-
Link(f"{url}/invalid.tar.gz"),
34-
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
35-
Link(f"{url}/demo-0.1.1.tar.gz"),
36-
]
37-
),
38-
)
39-
return link_source
26+
27+
class LinkSourceMock(LinkSource):
28+
def _get_link_cache(
29+
self,
30+
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
31+
return defaultdict(
32+
lambda: defaultdict(list),
33+
{
34+
canonicalize_name("demo"): defaultdict(
35+
list,
36+
{
37+
Version.parse("0.1.0"): [
38+
Link(f"{url}/demo-0.1.0.tar.gz"),
39+
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
40+
],
41+
Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")],
42+
},
43+
),
44+
canonicalize_name(""): defaultdict(
45+
list,
46+
{
47+
Version.parse("0"): [
48+
Link(f"{url}/demo-0.1.0_invalid.tar.gz"),
49+
Link(f"{url}/invalid.tar.gz"),
50+
]
51+
},
52+
),
53+
},
54+
)
55+
56+
return LinkSourceMock(url)
4057

4158

4259
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)