Skip to content

Commit

Permalink
feature: cache downloaded wheel information (#2276)
Browse files Browse the repository at this point in the history
Co-authored-by: Bernát Gábor <[email protected]>
  • Loading branch information
mayeut and gaborbernat authored Jan 2, 2022
1 parent 5f65057 commit 319a540
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 74 deletions.
2 changes: 2 additions & 0 deletions docs/changelog/2268.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add downloaded wheel information in the relevant JSON embed file to
prevent additional downloads of the same wheel. - by :user:`mayeut`.
4 changes: 2 additions & 2 deletions src/virtualenv/app_data/via_disk_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
│ │ └── <install class> -> CopyPipInstall / SymlinkPipInstall
│ │ └── <wheel name> -> pip-20.1.1-py2.py3-none-any
│ └── embed
│ └── 2 -> json format versioning
│ └── 3 -> json format versioning
│ └── *.json -> for every distribution contains data about newer embed versions and releases
└─── unzip <in zip app we cannot refer to some internal files, so first extract them>
└── <virtualenv version>
Expand Down Expand Up @@ -101,7 +101,7 @@ def py_info_clear(self):
filename.unlink()

def embed_update_log(self, distribution, for_py_version):
return EmbedDistributionUpdateStoreDisk(self.lock / "wheel" / for_py_version / "embed" / "2", distribution)
return EmbedDistributionUpdateStoreDisk(self.lock / "wheel" / for_py_version / "embed" / "3", distribution)

@property
def house(self):
Expand Down
3 changes: 3 additions & 0 deletions src/virtualenv/seed/wheels/acquire.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from virtualenv.util.subprocess import Popen, subprocess

from .bundle import from_bundle
from .periodic_update import add_wheel_to_update_log
from .util import Version, Wheel, discover_wheels


Expand All @@ -35,6 +36,8 @@ def get_wheel(distribution, version, for_py_version, search_dirs, download, app_
to_folder=app_data.house,
env=env,
)
if wheel is not None and app_data.can_update:
add_wheel_to_update_log(wheel, for_py_version, app_data)

return wheel

Expand Down
43 changes: 32 additions & 11 deletions src/virtualenv/seed/wheels/periodic_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,19 @@ def handle_auto_update(distribution, for_py_version, wheel, search_dirs, app_dat
trigger_update(distribution, for_py_version, wheel, search_dirs, app_data, periodic=True, env=env)


def add_wheel_to_update_log(wheel, for_py_version, app_data):
embed_update_log = app_data.embed_update_log(wheel.distribution, for_py_version)
logging.debug("adding %s information to %s", wheel.name, embed_update_log.file)
u_log = UpdateLog.from_dict(embed_update_log.read())
if any(version.filename == wheel.name for version in u_log.versions):
logging.warning("%s already present in %s", wheel.name, embed_update_log.file)
return
# we don't need a release date for sources other than "periodic"
version = NewVersion(wheel.name, datetime.now(), None, "download")
u_log.versions.append(version) # always write at the end for proper updates
embed_update_log.write(u_log.to_dict())


DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%fZ"


Expand Down Expand Up @@ -248,23 +261,27 @@ def _run_do_update(app_data, distribution, embed_filename, for_py_version, perio
embed_update_log = app_data.embed_update_log(distribution, for_py_version)
u_log = UpdateLog.from_dict(embed_update_log.read())
now = datetime.now()

update_versions, other_versions = [], []
for version in u_log.versions:
if version.source in {"periodic", "manual"}:
update_versions.append(version)
else:
other_versions.append(version)

if periodic:
source = "periodic"
# mark everything not updated manually as source "periodic"
for version in u_log.versions:
if version.source != "manual":
version.source = source
else:
source = "manual"
# mark everything as source "manual"
for version in u_log.versions:
version.source = source
# mark the most recent one as source "manual"
if update_versions:
update_versions[0].source = source

if wheel_filename is not None:
dest = wheelhouse / wheel_filename.name
if not dest.exists():
copy2(str(wheel_filename), str(wheelhouse))
last, last_version, versions = None, None, []
last, last_version, versions, filenames = None, None, [], set()
while last is None or not last.use(now, ignore_grace_period_ci=True):
download_time = datetime.now()
dest = acquire.download_wheel(
Expand All @@ -276,21 +293,24 @@ def _run_do_update(app_data, distribution, embed_filename, for_py_version, perio
to_folder=wheelhouse,
env=os.environ,
)
if dest is None or (u_log.versions and u_log.versions[0].filename == dest.name):
if dest is None or (update_versions and update_versions[0].filename == dest.name):
break
release_date = release_date_for_wheel_path(dest.path)
last = NewVersion(filename=dest.path.name, release_date=release_date, found_date=download_time, source=source)
logging.info("detected %s in %s", last, datetime.now() - download_time)
versions.append(last)
last_wheel = Wheel(Path(last.filename))
filenames.add(last.filename)
last_wheel = last.wheel
last_version = last_wheel.version
if embed_version is not None:
if embed_version >= last_wheel.version_tuple: # stop download if we reach the embed version
break
u_log.periodic = periodic
if not u_log.periodic:
u_log.started = now
u_log.versions = versions + u_log.versions
# update other_versions by removing version we just found
other_versions = [version for version in other_versions if version.filename not in filenames]
u_log.versions = versions + update_versions + other_versions
u_log.completed = datetime.now()
embed_update_log.write(u_log.to_dict())
return versions
Expand Down Expand Up @@ -395,6 +415,7 @@ def _run_manual_upgrade(app_data, distribution, for_py_version, env):


__all__ = (
"add_wheel_to_update_log",
"periodic_update",
"do_update",
"manual_upgrade",
Expand Down
59 changes: 54 additions & 5 deletions tests/unit/seed/wheels/test_acquire.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,25 @@

import os
import sys
from datetime import datetime
from subprocess import CalledProcessError

import pytest

from virtualenv.app_data import AppDataDiskFolder
from virtualenv.info import IS_PYPY, PY2
from virtualenv.seed.wheels.acquire import download_wheel, get_wheel, pip_wheel_env_run
from virtualenv.seed.wheels.embed import BUNDLE_FOLDER, get_embed_wheel
from virtualenv.seed.wheels.periodic_update import dump_datetime
from virtualenv.seed.wheels.util import Wheel, discover_wheels
from virtualenv.util.path import Path


@pytest.fixture(autouse=True)
def fake_release_date(mocker):
mocker.patch("virtualenv.seed.wheels.periodic_update.release_date_for_wheel_path", return_value=None)


def test_pip_wheel_env_run_could_not_find(session_app_data, mocker):
mocker.patch("virtualenv.seed.wheels.acquire.from_bundle", return_value=None)
with pytest.raises(RuntimeError, match="could not find the embedded pip"):
Expand Down Expand Up @@ -74,24 +83,64 @@ def test_download_fails(mocker, for_py_version, session_app_data):
@pytest.fixture
def downloaded_wheel(mocker):
wheel = Wheel.from_path(Path("setuptools-0.0.0-py2.py3-none-any.whl"))
mocker.patch("virtualenv.seed.wheels.acquire.download_wheel", return_value=wheel)
yield wheel
yield wheel, mocker.patch("virtualenv.seed.wheels.acquire.download_wheel", return_value=wheel)


@pytest.mark.parametrize("version", ["bundle", "0.0.0"])
def test_get_wheel_download_called(for_py_version, session_app_data, downloaded_wheel, version):
def test_get_wheel_download_called(mocker, for_py_version, session_app_data, downloaded_wheel, version):
distribution = "setuptools"
write = mocker.patch("virtualenv.app_data.via_disk_folder.JSONStoreDisk.write")
wheel = get_wheel(distribution, version, for_py_version, [], True, session_app_data, False, os.environ)
assert wheel is not None
assert wheel.name == downloaded_wheel.name
assert wheel.name == downloaded_wheel[0].name
assert downloaded_wheel[1].call_count == 1
assert write.call_count == 1


@pytest.mark.parametrize("version", ["embed", "pinned"])
def test_get_wheel_download_not_called(for_py_version, session_app_data, downloaded_wheel, version):
def test_get_wheel_download_not_called(mocker, for_py_version, session_app_data, downloaded_wheel, version):
distribution = "setuptools"
expected = get_embed_wheel(distribution, for_py_version)
if version == "pinned":
version = expected.version
write = mocker.patch("virtualenv.app_data.via_disk_folder.JSONStoreDisk.write")
wheel = get_wheel(distribution, version, for_py_version, [], True, session_app_data, False, os.environ)
assert wheel is not None
assert wheel.name == expected.name
assert downloaded_wheel[1].call_count == 0
assert write.call_count == 0


@pytest.mark.skipif(IS_PYPY and PY2, reason="mocker.spy failing on PyPy 2.x")
def test_get_wheel_download_cached(tmp_path, freezer, mocker, for_py_version, downloaded_wheel):
from virtualenv.app_data.via_disk_folder import JSONStoreDisk

app_data = AppDataDiskFolder(folder=str(tmp_path))
expected = downloaded_wheel[0]
write = mocker.spy(JSONStoreDisk, "write")
# 1st call, not cached, download is called
wheel = get_wheel(expected.distribution, expected.version, for_py_version, [], True, app_data, False, os.environ)
assert wheel is not None
assert wheel.name == expected.name
assert downloaded_wheel[1].call_count == 1
assert write.call_count == 1
# 2nd call, cached, download is not called
wheel = get_wheel(expected.distribution, expected.version, for_py_version, [], True, app_data, False, os.environ)
assert wheel is not None
assert wheel.name == expected.name
assert downloaded_wheel[1].call_count == 1
assert write.call_count == 1
wrote_json = write.call_args[0][1]
assert wrote_json == {
"completed": None,
"periodic": None,
"started": None,
"versions": [
{
"filename": expected.name,
"release_date": None,
"found_date": dump_datetime(datetime.now()),
"source": "download",
},
],
}
Loading

0 comments on commit 319a540

Please sign in to comment.