Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set --no-pre-install-wheels and PEX_MAX_INSTALL_JOBS for faster builds of internal pexes #20670

Merged
merged 3 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/python/pants/backend/python/goals/publish_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,13 @@ def test_twine_upload(rule_runner, packages) -> None:
"my-package-0.1.0.tar.gz",
"my_package-0.1.0-py3-none-any.whl",
),
env=FrozenDict({"TWINE_USERNAME": "whoareyou", "TWINE_PASSWORD": "secret"}),
env=FrozenDict(
{
"TWINE_USERNAME": "whoareyou",
"TWINE_PASSWORD": "secret",
"PEX_MAX_INSTALL_JOBS": "0",
}
),
),
)
assert_package(
Expand All @@ -156,7 +162,7 @@ def test_twine_upload(rule_runner, packages) -> None:
"my-package-0.1.0.tar.gz",
"my_package-0.1.0-py3-none-any.whl",
),
env=FrozenDict({"TWINE_USERNAME": "whoami"}),
env=FrozenDict({"TWINE_USERNAME": "whoami", "PEX_MAX_INSTALL_JOBS": "0"}),
),
)

Expand Down
21 changes: 18 additions & 3 deletions src/python/pants/backend/python/util_rules/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,12 @@ async def build_pex(
if request.pex_path:
argv.extend(["--pex-path", ":".join(pex.name for pex in request.pex_path)])

if request.internal_only:
# An internal-only runs on a single machine, and pre-installing wheels is wasted work in
# that case (see https://github.com/pex-tool/pex/issues/2292#issuecomment-1854582647 for
# analysis).
argv.append("--no-pre-install-wheels")

argv.append(f"--sources-directory={source_dir_name}")
sources_digest_as_subdir = await Get(
Digest, AddPrefix(request.sources or EMPTY_DIGEST, source_dir_name)
Expand Down Expand Up @@ -1157,6 +1163,9 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment
complete_pex_env = pex_environment.in_sandbox(working_directory=request.working_directory)
argv = complete_pex_env.create_argv(pex.name, *request.argv)
env = {
# Set this in case this PEX was built with --no-pre-install-wheels, and thus parallelising
# the install on cold boot is handy.
"PEX_MAX_INSTALL_JOBS": str(request.concurrency_available),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is what you were getting at in your notes, but as a rough estimate if I have 16 cores this will potentially thrash into 16 different cache entries, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, in theory... depending on what else is running at the same time.

However, on closer examination prompted by your comment: I think I did this wrong:

  • concurrency_available is configuration to tell the command runner how much concurrency would be useful for the process (e.g. "I'm processing 34 files, and could do so in parallel" => concurrency available = 34), not necessary how much it is actually allocated.
  • That allocation is communicated to the process by replacing the {pants_concurrency} template in its argv (to be used like ["some-process", ..., "--jobs={pants_concurrency}"] or similar).
  • This replacement currently only seems to work on the argv, not env vars.

For myself, relevant code (and surrounds), of the concurrency_available request flowing through to the replacement:

let semaphore_acquisition = self.sema.acquire(process.concurrency_available);
let permit = in_workunit!(
"acquire_command_runner_slot",
// TODO: The UI uses the presence of a blocked workunit below a parent as an indication that
// the parent is blocked. If this workunit is filtered out, parents nodes which are waiting
// for the semaphore will render, even though they are effectively idle.
//
// https://github.com/pantsbuild/pants/issues/14680 will likely allow for a more principled
// solution to this problem, such as removing the mutable `blocking` flag, and then never
// filtering blocked workunits at creation time, regardless of level.
Level::Debug,
|workunit| async move {
let _blocking_token = workunit.blocking();
semaphore_acquisition.await
}
)
.await;
loop {
let mut process = process.clone();
let concurrency_available = permit.concurrency();
log::debug!(
"Running {} under semaphore with concurrency id: {}, and concurrency: {}",
process.description,
permit.concurrency_slot(),
concurrency_available,
);
// TODO: Both of these templating cases should be implemented at the lowest possible level:
// they might currently be applied above a cache.
if let Some(ref execution_slot_env_var) = process.execution_slot_variable {
process.env.insert(
execution_slot_env_var.clone(),
format!("{}", permit.concurrency_slot()),
);
}
if process.concurrency_available > 0 {
let concurrency = format!("{}", permit.concurrency());
let mut matched = false;
process.argv = std::mem::take(&mut process.argv)
.into_iter()
.map(
|arg| match CONCURRENCY_TEMPLATE_RE.replace_all(&arg, &concurrency) {

So, I'm gonna revert this part of the change. Thanks.

#20795

**complete_pex_env.environment_dict(python=pex.python),
**request.extra_env,
}
Expand Down Expand Up @@ -1196,7 +1205,7 @@ class VenvPexProcess:
level: LogLevel
input_digest: Digest | None
working_directory: str | None
extra_env: FrozenDict[str, str] | None
extra_env: FrozenDict[str, str]
output_files: tuple[str, ...] | None
output_directories: tuple[str, ...] | None
timeout_seconds: int | None
Expand Down Expand Up @@ -1229,7 +1238,7 @@ def __init__(
object.__setattr__(self, "level", level)
object.__setattr__(self, "input_digest", input_digest)
object.__setattr__(self, "working_directory", working_directory)
object.__setattr__(self, "extra_env", FrozenDict(extra_env) if extra_env else None)
object.__setattr__(self, "extra_env", FrozenDict(extra_env or {}))
object.__setattr__(self, "output_files", tuple(output_files) if output_files else None)
object.__setattr__(
self, "output_directories", tuple(output_directories) if output_directories else None
Expand All @@ -1252,6 +1261,12 @@ async def setup_venv_pex_process(
else venv_pex.pex.argv0
)
argv = (pex_bin, *request.argv)
env = {
# Set this in case this PEX was built with --no-pre-install-wheels, and thus parallelising
# the install on cold boot is handy.
"PEX_MAX_INSTALL_JOBS": str(request.concurrency_available),
**request.extra_env,
}
input_digest = (
await Get(Digest, MergeDigests((venv_pex.digest, request.input_digest)))
if request.input_digest
Expand All @@ -1270,7 +1285,7 @@ async def setup_venv_pex_process(
level=request.level,
input_digest=input_digest,
working_directory=request.working_directory,
env=request.extra_env,
env=env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=append_only_caches,
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/python/util_rules/pex_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class PexCli(TemplatedExternalTool):

default_version = "v2.3.0"
default_url_template = "https://github.com/pex-tool/pex/releases/download/{version}/pex"
version_constraints = ">=2.1.161,<3.0"
version_constraints = ">=2.3.0,<3.0"

@classproperty
def default_known_versions(cls):
Expand Down
Loading