Skip to content

Commit

Permalink
Lower noop wheel install overhead. (#2315)
Browse files Browse the repository at this point in the history
Previously, installing a wheel that was already installed incurred the
cost of hashing the installed wheel chroot every time. The overhead of
this wasted work for a warm cache was egregious for large distributions
like PyTorch, with gigabytes of files to hash taking seconds.

Work towards #2312.
  • Loading branch information
jsirois authored Jan 5, 2024
1 parent 4eb5c9a commit 1be2143
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 12 deletions.
4 changes: 2 additions & 2 deletions pex/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ def iter_map_parallel(
# least two slots to ensure we process input items in parallel.
pool_size = max(2, min(len(input_items) // min_average_load, _sanitize_max_jobs(max_jobs)))

perform_install = functools.partial(_apply_function, function)
apply_function = functools.partial(_apply_function, function)

slots = defaultdict(list) # type: DefaultDict[int, List[float]]
with TRACER.timed(
Expand All @@ -741,7 +741,7 @@ def iter_map_parallel(
)
):
with _mp_pool(size=pool_size) as pool:
for pid, result, elapsed_secs in pool.imap_unordered(perform_install, input_items):
for pid, result, elapsed_secs in pool.imap_unordered(apply_function, input_items):
TRACER.log(
"[{pid}] {verbed} {result} in {elapsed_secs:.2f}s".format(
pid=pid,
Expand Down
24 changes: 22 additions & 2 deletions pex/pep_376.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pex.common import is_pyc_dir, is_pyc_file, safe_mkdir, safe_open
from pex.interpreter import PythonInterpreter
from pex.typing import TYPE_CHECKING, cast
from pex.util import CacheHelper
from pex.venv.virtualenv import Virtualenv

if TYPE_CHECKING:
Expand Down Expand Up @@ -169,10 +170,24 @@ def save(
record_relpath, # type: Text
):
# type: (...) -> InstalledWheel
layout = {"stash_dir": stash_dir, "record_relpath": record_relpath}

# We currently need the installed wheel chroot hash for PEX-INFO / boot purposes. It is
# expensive to calculate; so we do it here 1 time when saving the installed wheel.
fingerprint = CacheHelper.dir_hash(prefix_dir, hasher=hashlib.sha256)

layout = {
"stash_dir": stash_dir,
"record_relpath": record_relpath,
"fingerprint": fingerprint,
}
with open(cls.layout_file(prefix_dir), "w") as fp:
json.dump(layout, fp, sort_keys=True)
return cls(prefix_dir=prefix_dir, stash_dir=stash_dir, record_relpath=record_relpath)
return cls(
prefix_dir=prefix_dir,
stash_dir=stash_dir,
record_relpath=record_relpath,
fingerprint=fingerprint,
)

@classmethod
def load(cls, prefix_dir):
Expand Down Expand Up @@ -201,15 +216,20 @@ def load(cls, prefix_dir):
layout_file=layout_file, value=layout
)
)

fingerprint = layout.get("fingerprint")

return cls(
prefix_dir=prefix_dir,
stash_dir=cast(str, stash_dir),
record_relpath=cast(str, record_relpath),
fingerprint=cast("Optional[str]", fingerprint),
)

prefix_dir = attr.ib() # type: str
stash_dir = attr.ib() # type: str
record_relpath = attr.ib() # type: Text
fingerprint = attr.ib() # type: Optional[str]

def stashed_path(self, *components):
# type: (*str) -> str
Expand Down
12 changes: 11 additions & 1 deletion pex/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pex.jobs import Raise, SpawnedJob, execute_parallel, iter_map_parallel
from pex.network_configuration import NetworkConfiguration
from pex.orderedset import OrderedSet
from pex.pep_376 import InstalledWheel, LoadError
from pex.pep_425 import CompatibilityTags
from pex.pep_427 import InstallableType, WheelError, install_wheel_chroot
from pex.pep_503 import ProjectName
Expand Down Expand Up @@ -485,7 +486,16 @@ def finalize_install(self, install_requests):
# pex: * - paths that do not exist or will be imported via zipimport
# pex.pex 2.0.2
#
wheel_dir_hash = fingerprint_path(self.install_chroot)
cached_fingerprint = None # type: Optional[str]
try:
installed_wheel = InstalledWheel.load(self.install_chroot)
except LoadError:
# We support legacy chroots below by calculating the chroot fingerprint just in time.
pass
else:
cached_fingerprint = installed_wheel.fingerprint

wheel_dir_hash = cached_fingerprint or fingerprint_path(self.install_chroot)
runtime_key_dir = os.path.join(self._installation_root, wheel_dir_hash)
with atomic_directory(runtime_key_dir) as atomic_dir:
if not atomic_dir.is_finalized():
Expand Down
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/attrs/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "attrs-21.5.0.dev0.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "13e0015aa58e8f470b17936f42a5d5f874a20194156c371d2a4c695dc8c16d9e", "record_relpath": "attrs-21.5.0.dev0.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/packaging_20_9/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "packaging-20.9.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "5a3fc5dcd563b4a4474944cd0d73ee4a51fb53132af553abfb203dc1cdf8f7c3", "record_relpath": "packaging-20.9.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/packaging_21_3/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "packaging-21.3.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "2a93da49a5fc8217a0f710ff0ca3cfc56fefa35eddcf6a64786d452bfa284525", "record_relpath": "packaging-21.3.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/packaging_23_1/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "packaging-23.1.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "a98c4a74d7b6a62763df7ad330ac4c7a0779323fc36e961aeb8f20865e21a191", "record_relpath": "packaging-23.1.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/pip/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "pip-20.3.4.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "120267325b80f5c4b4adac019eb6617ab3319395c043d2871eedf70dd6ae2954", "record_relpath": "pip-20.3.4.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/setuptools/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "setuptools-44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "ebe3717ba6bad87ca328cbf3d3eb3f5475105bccb51dc09a69d37eff6b2e5210", "record_relpath": "setuptools-44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/toml/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "toml-0.10.2.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "3d44cdc5911c31b190a0225202daafbb22f7f74e6761fb086742dadb5dff5384", "record_relpath": "toml-0.10.2.dist-info/RECORD", "stash_dir": ".prefix"}

0 comments on commit 1be2143

Please sign in to comment.