Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce pex3 cache {dir,info,purge}. #2513

Merged
merged 8 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pex/atomic_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def acquire(self):
lock_api(lock_fd, fcntl.LOCK_EX) # A blocking write lock.

def release():
# type: () -> None
try:
lock_api(lock_fd, fcntl.LOCK_UN)
finally:
Expand Down
19 changes: 15 additions & 4 deletions pex/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
import sys
import types

from pex.typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any, List


class Bootstrap(object):
"""Supports introspection of the PEX bootstrap code."""
Expand Down Expand Up @@ -45,12 +50,14 @@ def path(self):
return self._sys_path_entry

def demote(self, disable_vendor_importer=True):
# type: (bool) -> List[types.ModuleType]
"""Demote the bootstrap code to the end of the `sys.path` so it is found last.

:return: The list of un-imported bootstrap modules.
:rtype: list of :class:`types.ModuleType`
"""
import sys # Grab a hold of `sys` early since we'll be un-importing our module in this process.
# Grab a hold of `sys` early since we'll be un-importing our module in this process.
import sys

# N.B.: We mutate the sys.path before un-importing modules so that any re-imports triggered
# by concurrent code will pull from the desired sys.path ordering.
Expand All @@ -59,20 +66,23 @@ def demote(self, disable_vendor_importer=True):
sys.path[:] = [path for path in sys.path if os.path.realpath(path) != self._realpath]
sys.path.append(self._sys_path_entry)

unimported_modules = []
unimported_modules = [] # type: List[types.ModuleType]
for name, module in reversed(sorted(sys.modules.items())):
if "pex.cache.access" == name:
# N.B.: The pex.cache.access module maintains cache lock state which must be
# preserved in the case of a Pex PEX.
module.save_lock_state()
if "pex.third_party" == name and not disable_vendor_importer:
continue
if self.imported_from_bootstrap(module):
unimported_modules.append(sys.modules.pop(name))
return unimported_modules

def imported_from_bootstrap(self, module):
# type: (Any) -> bool
"""Return ``True`` if the given ``module`` object was imported from bootstrap code.

:param module: The module to check the provenance of.
:type module: :class:`types.ModuleType`
:rtype: bool
"""

# Python 2.7 does some funky imports in the email stdlib package that cause havoc with
Expand All @@ -96,6 +106,7 @@ def imported_from_bootstrap(self, module):
return False

def __repr__(self):
# type: () -> str
return "{cls}(sys_path_entry={sys_path_entry!r})".format(
cls=type(self).__name__, sys_path_entry=self._sys_path_entry
)
Expand Down
2 changes: 2 additions & 0 deletions pex/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).
101 changes: 101 additions & 0 deletions pex/cache/access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, print_function

import fcntl
import os
from contextlib import contextmanager

from pex.common import safe_mkdir
from pex.typing import TYPE_CHECKING
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Iterator, Optional, Tuple


# N.B.: The lock file path is last in the lock state tuple to allow for a simple encoding scheme in
# `save_lock_state` that is impervious to a delimiter collision in the lock file path when decoding
# in `_maybe_restore_lock_state` (due to maxsplit).

_LOCK = None # type: Optional[Tuple[bool, int, str]]

_PEX_CACHE_ACCESS_LOCK_ENV_VAR = "_PEX_CACHE_ACCESS_LOCK"


def save_lock_state():
# type: () -> None
"""Records any current lock state in a manner that can survive un-importing of this module."""

# N.B.: This supports the sole case of a Pex PEX, whose runtime obtains a lock that it must hand
# off to the Pex CLI it spawns.

global _LOCK
if _LOCK is not None:
exclusive, lock_fd, lock_file = _LOCK
os.environ[_PEX_CACHE_ACCESS_LOCK_ENV_VAR] = "|".join(
(str(int(exclusive)), str(lock_fd), lock_file)
)


def _maybe_restore_lock_state():
# type: () -> None

saved_lock_state = os.environ.pop(_PEX_CACHE_ACCESS_LOCK_ENV_VAR, None)
if saved_lock_state:
encoded_exclusive, encoded_lock_fd, lock_file = saved_lock_state.split("|", 2)
global _LOCK
_LOCK = bool(int(encoded_exclusive)), int(encoded_lock_fd), lock_file


def _lock(exclusive):
# type: (bool) -> str

lock_fd = None # type: Optional[int]

global _LOCK
if _LOCK is None:
_maybe_restore_lock_state()
if _LOCK is not None:
existing_exclusive, lock_fd, existing_lock_file = _LOCK
if existing_exclusive == exclusive:
return existing_lock_file

lock_file = os.path.join(ENV.PEX_ROOT, "access.lck")

if lock_fd is None:
# N.B.: We don't actually write anything to the lock file but the fcntl file locking
# operations only work on files opened for at least write.
safe_mkdir(os.path.dirname(lock_file))
lock_fd = os.open(lock_file, os.O_CREAT | os.O_WRONLY)

# N.B.: Since flock operates on an open file descriptor and these are
# guaranteed to be closed by the operating system when the owning process exits,
# this lock is immune to staleness.
fcntl.flock(lock_fd, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)

_LOCK = exclusive, lock_fd, lock_file
return lock_file


def read_write():
# type: () -> str
"""Obtains the shared Pex cache read-write lock.

This function blocks until it is safe to use the Pex cache.
"""
return _lock(exclusive=False)


@contextmanager
def await_delete_lock():
# type: () -> Iterator[str]
"""Awaits the Pex cache delete lock, yielding the lock file path.

When the context manager exits, the delete lock is held, and it is safe to delete all or
portions of the Pex cache.
"""
lock_file = _lock(exclusive=False)
yield lock_file
_lock(exclusive=True)
181 changes: 181 additions & 0 deletions pex/cache/dirs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import os

from pex.enum import Enum
from pex.typing import TYPE_CHECKING
from pex.variables import ENV, Variables

if TYPE_CHECKING:
from typing import Iterable, Iterator, Union


class CacheDir(Enum["CacheDir.Value"]):
class Value(Enum.Value):
def __init__(
self,
value, # type: str
name, # type: str
version, # type: int
description, # type: str
dependencies=(), # type: Iterable[CacheDir.Value]
):
Enum.Value.__init__(self, value)
self.name = name
self.version = version
self.description = description
self.dependencies = tuple(dependencies)

@property
def rel_path(self):
# type: () -> str
return os.path.join(self.value, str(self.version))

def path(
self,
*subdirs, # type: str
**kwargs # type: Union[str, Variables]
):
# type: (...) -> str
pex_root = kwargs.get("pex_root", ENV)
return os.path.join(
pex_root.PEX_ROOT if isinstance(pex_root, Variables) else pex_root,
self.rel_path,
*subdirs
)

def iter_transitive_dependents(self):
# type: () -> Iterator[CacheDir.Value]
for cache_dir in CacheDir.values():
if self in cache_dir.dependencies:
yield cache_dir
for dependent in cache_dir.iter_transitive_dependents():
yield dependent

BOOTSTRAP_ZIPS = Value(
"bootstrap_zips",
version=0,
name="Packed Bootstraps",
description="PEX runtime bootstrap code, zipped up for `--layout packed` PEXes.",
)

BOOTSTRAPS = Value(
"bootstraps",
version=0,
name="Bootstraps",
description="PEX runtime bootstrap code.",
)

BUILT_WHEELS = Value(
"built_wheels",
version=0,
name="Built Wheels",
description="Wheels built by Pex from resolved sdists when creating PEX files.",
)

DOCS = Value(
"docs",
version=0,
name="Pex Docs",
description="Artifacts used in serving Pex docs via `pex --docs` and `pex3 docs`.",
)

DOWNLOADS = Value(
"downloads",
version=0,
name="Lock Artifact Downloads",
description="Distributions downloaded when resolving from a Pex lock file.",
)

INSTALLED_WHEELS = Value(
"installed_wheels",
version=0,
name="Pre-installed Wheels",
description=(
"Pre-installed wheel chroots used to both build PEXes and serve as runtime `sys.path` "
"entries."
),
)

INTERPRETERS = Value(
"interpreters",
version=0,
name="Interpreters",
description="Information about interpreters found on the system.",
)

ISOLATED = Value(
"isolated",
version=0,
name="Isolated Pex Code",
description="The Pex codebase isolated for internal use in subprocesses.",
)

PACKED_WHEELS = Value(
"packed_wheels",
version=0,
name="Packed Wheels",
description=(
"The same content as {installed_wheels!r}, but zipped up for `--layout packed` "
"PEXes.".format(installed_wheels=INSTALLED_WHEELS.rel_path)
),
)

PIP = Value(
"pip",
version=0,
name="Pip Versions",
description="Isolated Pip caches and Pip PEXes Pex uses to resolve distributions.",
)

PLATFORMS = Value(
"platforms",
version=0,
name="Abbreviated Platforms",
description=(
"Information calculated about abbreviated platforms specified via `--platform`."
),
)

SCIES = Value(
"scies",
version=0,
name="Scie Tools",
description="Tools and caches used when building PEX scies via `--scie {eager,lazy}`.",
)

TOOLS = Value(
"tools",
version=0,
name="Pex Tools",
description="Caches for the various `PEX_TOOLS=1` / `pex-tools` subcommands.",
)

USER_CODE = Value(
"user_code",
version=0,
name="User Code",
description=(
"User code added to PEX files using `-D` / `--sources-directory`, `-P` / `--package` "
"and `-M` / `--module`."
),
)

UNZIPPED_PEXES = Value(
"unzipped_pexes",
version=0,
name="Unzipped PEXes",
description="The unzipped PEX files executed on this machine.",
dependencies=[BOOTSTRAPS, USER_CODE, INSTALLED_WHEELS],
)

VENVS = Value(
"venvs",
version=0,
name="Virtual Environments",
description="Virtual environments generated at runtime for `--venv` mode PEXes.",
dependencies=[INSTALLED_WHEELS],
)
16 changes: 5 additions & 11 deletions pex/cache.py → pex/cache/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from pex.typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Iterable

import appdirs # vendor:skip
else:
from pex.third_party import appdirs
Expand All @@ -20,13 +18,9 @@
_CACHE_DIR = appdirs.user_cache_dir(appauthor="pex-tool.org", appname="pex") # type: str


def cache_path(
sub_path=(), # type: Iterable[str]
expand_user=True, # type: bool
):
# type: (...) -> str
def path(expand_user=True):
# type: (bool) -> str

path = os.path.join(_CACHE_DIR, *sub_path)
if expand_user or _USER_DIR != commonpath((_USER_DIR, path)):
return path
return os.path.join("~", os.path.relpath(path, _USER_DIR))
if expand_user or _USER_DIR != commonpath((_USER_DIR, _CACHE_DIR)):
return _CACHE_DIR
return os.path.join("~", os.path.relpath(_CACHE_DIR, _USER_DIR))
3 changes: 2 additions & 1 deletion pex/cli/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pex.cli.command import BuildTimeCommand
from pex.cli.commands.cache.command import Cache
from pex.cli.commands.docs import Docs
from pex.cli.commands.interpreter import Interpreter
from pex.cli.commands.lock import Lock
Expand All @@ -14,4 +15,4 @@

def all_commands():
# type: () -> Iterable[Type[BuildTimeCommand]]
return Docs, Interpreter, Lock, Venv
return Cache, Docs, Interpreter, Lock, Venv
Empty file.
Loading