Skip to content

Commit ae57555

Browse files
committed
fix: performance regression when parsing links from legacy repositories (python-poetry#6442)
Resolves: python-poetry#6436 Measurements of `poetry lock` with warm cache with example pyproject.toml from python-poetry#6436: |test case|time in s|peak memory usage in MB| |---|---|---| |legacy repository (before)|422|113| |legacy repository (after)|3|118| |pypi repository|1|92| `backports.cached-property` is used in order to support cached_property on Python 3.7. Co-authored-by: Jarrod Moore <[email protected]> Co-authored-by: Bjorn Neergaard <[email protected]> (cherry picked from commit c4b2253)
1 parent 0f385cb commit ae57555

File tree

8 files changed

+77
-50
lines changed

8 files changed

+77
-50
lines changed

poetry.lock

+13-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ python = "^3.7"
4646

4747
poetry-core = "1.1.0"
4848
poetry-plugin-export = "^1.0.6"
49+
"backports.cached-property" = { version = "^1.0.2", python = "<3.8" }
4950
cachecontrol = { version = "^0.12.9", extras = ["filecache"] }
5051
cachy = "^0.3.0"
5152
cleo = "^1.0.0a5"

src/poetry/repositories/link_sources/base.py

+15-18
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
import logging
44
import re
55

6-
from abc import abstractmethod
76
from typing import TYPE_CHECKING
7+
from typing import DefaultDict
8+
from typing import List
89

910
from packaging.utils import canonicalize_name
1011
from poetry.core.packages.package import Package
1112
from poetry.core.semver.version import Version
1213

14+
from poetry.utils._compat import cached_property
1315
from poetry.utils.patterns import sdist_file_re
1416
from poetry.utils.patterns import wheel_file_re
1517

@@ -20,6 +22,8 @@
2022
from packaging.utils import NormalizedName
2123
from poetry.core.packages.utils.link import Link
2224

25+
LinkCache = DefaultDict[NormalizedName, DefaultDict[Version, List[Link]]]
26+
2327

2428
logger = logging.getLogger(__name__)
2529

@@ -44,16 +48,8 @@ def __init__(self, url: str) -> None:
4448
def url(self) -> str:
4549
return self._url
4650

47-
def versions(self, name: str) -> Iterator[Version]:
48-
name = canonicalize_name(name)
49-
seen: set[Version] = set()
50-
51-
for link in self.links:
52-
pkg = self.link_package_data(link)
53-
54-
if pkg and pkg.name == name and pkg.version not in seen:
55-
seen.add(pkg.version)
56-
yield pkg.version
51+
def versions(self, name: NormalizedName) -> Iterator[Version]:
52+
yield from self._link_cache[name]
5753

5854
@property
5955
def packages(self) -> Iterator[Package]:
@@ -64,9 +60,10 @@ def packages(self) -> Iterator[Package]:
6460
yield pkg
6561

6662
@property
67-
@abstractmethod
6863
def links(self) -> Iterator[Link]:
69-
raise NotImplementedError()
64+
for links_per_version in self._link_cache.values():
65+
for links in links_per_version.values():
66+
yield from links
7067

7168
@classmethod
7269
def link_package_data(cls, link: Link) -> Package | None:
@@ -102,11 +99,7 @@ def link_package_data(cls, link: Link) -> Package | None:
10299
def links_for_version(
103100
self, name: NormalizedName, version: Version
104101
) -> Iterator[Link]:
105-
for link in self.links:
106-
pkg = self.link_package_data(link)
107-
108-
if pkg and pkg.name == name and pkg.version == version:
109-
yield link
102+
yield from self._link_cache[name][version]
110103

111104
def clean_link(self, url: str) -> str:
112105
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
@@ -127,3 +120,7 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool:
127120
if reasons:
128121
return "\n".join(sorted(reasons))
129122
return True
123+
124+
@cached_property
125+
def _link_cache(self) -> LinkCache:
126+
raise NotImplementedError()

src/poetry/repositories/link_sources/html.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,19 @@
33
import urllib.parse
44
import warnings
55

6+
from collections import defaultdict
67
from html import unescape
78
from typing import TYPE_CHECKING
89

910
from poetry.core.packages.utils.link import Link
1011

1112
from poetry.repositories.link_sources.base import LinkSource
13+
from poetry.utils._compat import cached_property
1214

1315

1416
if TYPE_CHECKING:
15-
from collections.abc import Iterator
17+
from poetry.repositories.link_sources.base import LinkCache
18+
1619

1720
with warnings.catch_warnings():
1821
warnings.simplefilter("ignore")
@@ -25,8 +28,9 @@ def __init__(self, url: str, content: str) -> None:
2528

2629
self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
2730

28-
@property
29-
def links(self) -> Iterator[Link]:
31+
@cached_property
32+
def _link_cache(self) -> LinkCache:
33+
links: LinkCache = defaultdict(lambda: defaultdict(list))
3034
for anchor in self._parsed.findall(".//a"):
3135
if anchor.get("href"):
3236
href = anchor.get("href")
@@ -44,7 +48,11 @@ def links(self) -> Iterator[Link]:
4448
if link.ext not in self.SUPPORTED_FORMATS:
4549
continue
4650

47-
yield link
51+
pkg = self.link_package_data(link)
52+
if pkg:
53+
links[pkg.name][pkg.version].append(link)
54+
55+
return links
4856

4957

5058
class SimpleRepositoryPage(HTMLPage):

src/poetry/utils/_compat.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
else:
1414
from importlib import metadata
1515

16+
if sys.version_info < (3, 8):
17+
# compatibility for python <3.8
18+
from backports.cached_property import cached_property
19+
else:
20+
from functools import cached_property
21+
1622
WINDOWS = sys.platform == "win32"
1723

1824

@@ -53,4 +59,12 @@ def list_to_shell_command(cmd: list[str]) -> str:
5359
)
5460

5561

56-
__all__ = ["WINDOWS", "decode", "encode", "list_to_shell_command", "metadata", "to_str"]
62+
__all__ = [
63+
"WINDOWS",
64+
"cached_property",
65+
"decode",
66+
"encode",
67+
"list_to_shell_command",
68+
"metadata",
69+
"to_str",
70+
]

tests/repositories/link_sources/test_base.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from collections import defaultdict
34
from typing import TYPE_CHECKING
45
from unittest.mock import PropertyMock
56

@@ -24,16 +25,22 @@ def link_source(mocker: MockerFixture) -> LinkSource:
2425
url = "https://example.org"
2526
link_source = LinkSource(url)
2627
mocker.patch(
27-
f"{LinkSource.__module__}.{LinkSource.__qualname__}.links",
28+
f"{LinkSource.__module__}.{LinkSource.__qualname__}._link_cache",
2829
new_callable=PropertyMock,
29-
return_value=iter(
30-
[
31-
Link(f"{url}/demo-0.1.0.tar.gz"),
32-
Link(f"{url}/demo-0.1.0_invalid.tar.gz"),
33-
Link(f"{url}/invalid.tar.gz"),
34-
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
35-
Link(f"{url}/demo-0.1.1.tar.gz"),
36-
]
30+
return_value=defaultdict(
31+
lambda: defaultdict(list),
32+
{
33+
canonicalize_name("demo"): defaultdict(
34+
list,
35+
{
36+
Version.parse("0.1.0"): [
37+
Link(f"{url}/demo-0.1.0.tar.gz"),
38+
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
39+
],
40+
Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")],
41+
},
42+
),
43+
},
3744
),
3845
)
3946
return link_source
@@ -63,7 +70,7 @@ def test_link_package_data(filename: str, expected: Package | None) -> None:
6370
],
6471
)
6572
def test_versions(name: str, expected: set[Version], link_source: LinkSource) -> None:
66-
assert set(link_source.versions(name)) == expected
73+
assert set(link_source.versions(canonicalize_name(name))) == expected
6774

6875

6976
def test_packages(link_source: LinkSource) -> None:

tests/repositories/link_sources/test_html.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pytest
44

5+
from packaging.utils import canonicalize_name
56
from poetry.core.packages.utils.link import Link
67
from poetry.core.semver.version import Version
78

@@ -90,4 +91,4 @@ def test_yanked(yanked_attrs: tuple[str, str], expected: bool | str) -> None:
9091
content = DEMO_TEMPLATE.format(anchors)
9192
page = HTMLPage("https://example.org", content)
9293

93-
assert page.yanked("demo", Version.parse("0.1")) == expected
94+
assert page.yanked(canonicalize_name("demo"), Version.parse("0.1")) == expected

tests/repositories/test_legacy_repository.py

+2-15
Original file line numberDiff line numberDiff line change
@@ -102,25 +102,12 @@ def test_page_invalid_version_link() -> None:
102102
assert page is not None
103103

104104
links = list(page.links)
105-
assert len(links) == 2
105+
assert len(links) == 1
106106

107-
versions = list(page.versions("poetry"))
107+
versions = list(page.versions(canonicalize_name("poetry")))
108108
assert len(versions) == 1
109109
assert versions[0].to_string() == "0.1.0"
110110

111-
invalid_link = None
112-
113-
for link in links:
114-
if link.filename.startswith("poetry-21"):
115-
invalid_link = link
116-
break
117-
118-
links_010 = list(page.links_for_version(canonicalize_name("poetry"), versions[0]))
119-
assert invalid_link not in links_010
120-
121-
assert invalid_link
122-
assert not page.link_package_data(invalid_link)
123-
124111
packages = list(page.packages)
125112
assert len(packages) == 1
126113
assert packages[0].name == "poetry"

0 commit comments

Comments
 (0)