Skip to content

Commit

Permalink
[Breaking] Decouple nativelink from toolchain containers
Browse files Browse the repository at this point in the history
Changes to `nativelink` no longer require rebuilding toolchain
containers and vice versa.

The `nativelink` executable has been removed from the `createWorker`
function. Instead, users should mount the `nativelink` executable into a
toolchain container during deployment. This makes deployments more
generic and provides out-of-the-box compatibility with any arbitrary
toolchain container.

Introduce a new `nativelink-worker-init` image, a thin wrapper around
the `nativelink` container that copies the bundled `nativelink`
executable to specified location. This can be used in worker deployments
to populate temporary volumes with the `nativelink` executable. The new
setup significantly improves setup times (observerd 90%+) for LRE-style
deployments as workflows can fetch the `nativelink-worker-init`
container instead of rebuilding the executable from scratch.
  • Loading branch information
aaronmondal committed Jun 19, 2024
1 parent c85b6df commit feadd39
Show file tree
Hide file tree
Showing 15 changed files with 185 additions and 95 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
image: [image, nativelink-worker-lre-cc]
image: [image, nativelink-worker-init, nativelink-worker-lre-cc]
name: Publish ${{ matrix.image }}
runs-on: large-ubuntu-22.04
permissions:
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/lre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ jobs:
--verbose_failures \
@local-remote-execution//examples:hello_lre"
# TODO(nativelink#986) Re-enable once LRE is no longer flaky.
# remote:
# strategy:
# fail-fast: false
Expand All @@ -66,6 +65,15 @@ jobs:
# uses: >- # v10
# DeterminateSystems/nix-installer-action@de22e16c4711fca50c816cc9081563429d1cf563

# - name: Free disk space
# uses: >- # v2.0.0
# endersonmenezes/free-disk-space@3f9ec39ebae520864ac93467ee395f5237585c21
# with:
# remove_android: true
# remove_dotnet: true
# remove_haskell: true
# remove_tool_cache: false

# - name: Cache Nix derivations
# uses: >- # v4
# DeterminateSystems/magic-nix-cache-action@fc6aaceb40b9845a02b91e059ec147e78d1b4e41
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tagged_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
fail-fast: false
matrix:
image: [image, nativelink-worker-lre-cc]
image: [image, nativelink-worker-init, nativelink-worker-lre-cc]
runs-on: ubuntu-22.04
permissions:
packages: write
Expand Down
20 changes: 10 additions & 10 deletions deployment-examples/chromium/01_operations.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# This script configures a cluster with a few standard deployments.
#!/usr/bin/env bash

# TODO(aaronmondal): Add Grafana, OpenTelemetry and the various other standard
# deployments one would expect in a cluster.
# Trigger cluster-internal pipelines to build or fetch necessary images.

set -xeuo pipefail

SRC_ROOT=$(git rev-parse --show-toplevel)

# The image for the scheduler and CAS.
curl -v \
-H 'content-Type: application/json' \
-d '{"flakeOutput": "./src_root#image"}' \
localhost:8082/eventlistener

# Wrap it nativelink to turn it into a worker.
curl -v \
-H 'content-Type: application/json' \
-d '{"flakeOutput": "./src_root#nativelink-worker-init"}' \
localhost:8082/eventlistener

curl -v \
-H 'content-Type: application/json' \
-d '{"flakeOutput": "./src_root#nativelink-worker-siso-chromium"}' \
Expand All @@ -25,12 +25,12 @@ until kubectl get pipelinerun \
sleep 0.1
done

printf 'Waiting for PipelineRuns to finish...
printf "Waiting for PipelineRuns to finish...
You may cancel this script now and use `tkn pr ls` and `tkn pr logs -f` to
You may cancel this script now and use 'tkn pr ls' and 'tkn pr logs -f' to
monitor the PipelineRun logs.
'
"

kubectl wait \
--for=condition=Succeeded \
Expand Down
16 changes: 9 additions & 7 deletions deployment-examples/chromium/02_application.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Get the nix derivation hash from the toolchain container, change the
# `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and apply
# the configuration.
#!/usr/bin/env bash

# Prepare the Kustomization and apply it to the cluster.

KUSTOMIZE_DIR=$(git rev-parse --show-toplevel)/deployment-examples/chromium

Expand All @@ -16,10 +16,12 @@ resources:
EOF

cd "$KUSTOMIZE_DIR" && kustomize edit set image \
nativelink=localhost:5001/nativelink:$(\
nix eval .#image.imageTag --raw) \
nativelink-worker-chromium=localhost:5001/nativelink-worker-siso-chromium:$(\
nix eval .#nativelink-worker-siso-chromium.imageTag --raw)
nativelink=localhost:5001/nativelink:"$(\
nix eval .#image.imageTag --raw)" \
nativelink-worker-init=localhost:5001/nativelink-worker-init:"$(\
nix eval .#nativelink-worker-init.imageTag --raw)" \
nativelink-worker-chromium=localhost:5001/nativelink-worker-siso-chromium:"$(\
nix eval .#nativelink-worker-siso-chromium.imageTag --raw)"
kubectl apply -k "$KUSTOMIZE_DIR"
Expand Down
2 changes: 2 additions & 0 deletions deployment-examples/chromium/03_build_chrome_tests.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env bash

set -euo pipefail

function fetch_chromium() {
Expand Down
9 changes: 4 additions & 5 deletions deployment-examples/chromium/04_delete_application.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Get the nix derivation hash from the toolchain container, change the
# `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and delete
# the configuration.
#!/usr/bin/env bash

KUSTOMIZE_DIR=$(git rev-parse --show-toplevel)/deployment-examples/chromium
# Delete the Kustomization but leave the rest of the cluster intact.

kubectl delete -k "$KUSTOMIZE_DIR"
kubectl delete -k \
"$(git rev-parse --show-toplevel)/deployment-examples/chromium"
13 changes: 12 additions & 1 deletion deployment-examples/chromium/worker-chromium.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ spec:
labels:
app: nativelink-worker-chromium
spec:
initContainers:
- name: nativelink-worker-init
# This image will be edited by kustomize.
image: nativelink-worker-init
args: ["/shared/nativelink"]
volumeMounts:
- name: shared
mountPath: /shared

containers:
- name: nativelink-worker-chromium
# This image will be edited by kustomize.
Expand All @@ -28,9 +37,11 @@ spec:
- name: worker-config
mountPath: /worker.json
subPath: worker.json
command: ["/bin/nativelink"]
command: ["/shared/nativelink"]
args: ["/worker.json"]
volumes:
- name: shared
emptyDir: {}
- name: worker-config
configMap:
name: worker
18 changes: 10 additions & 8 deletions deployment-examples/kubernetes/01_operations.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# This script configures a cluster with a few standard deployments.
#!/usr/bin/env bash

# TODO(aaronmondal): Add Grafana, OpenTelemetry and the various other standard
# deployments one would expect in a cluster.
# Trigger cluster-internal pipelines to build or fetch necessary images.

set -xeuo pipefail

SRC_ROOT=$(git rev-parse --show-toplevel)

curl -v \
-H 'content-Type: application/json' \
-d '{"flakeOutput": "./src_root#image"}' \
localhost:8082/eventlistener

curl -v \
-H 'content-Type: application/json' \
-d '{"flakeOutput": "./src_root#nativelink-worker-init"}' \
localhost:8082/eventlistener

curl -v \
-H 'content-Type: application/json' \
-d '{"flakeOutput": "./src_root#nativelink-worker-lre-cc"}' \
Expand All @@ -23,12 +25,12 @@ until kubectl get pipelinerun \
sleep 0.1
done

printf 'Waiting for PipelineRuns to finish...
printf "Waiting for PipelineRuns to finish...
You may cancel this script now and use `tkn pr ls` and `tkn pr logs -f` to
You may cancel this script now and use 'tkn pr ls' and 'tkn pr logs -f' to
monitor the PipelineRun logs.
'
"

kubectl wait \
--for=condition=Succeeded \
Expand Down
16 changes: 9 additions & 7 deletions deployment-examples/kubernetes/02_application.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Get the nix derivation hash from the toolchain container, change the
# `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and apply
# the configuration.
#!/usr/bin/env bash

# Prepare the Kustomization and apply it to the cluster.

KUSTOMIZE_DIR=$(git rev-parse --show-toplevel)/deployment-examples/kubernetes

Expand All @@ -18,10 +18,12 @@ resources:
EOF

cd "$KUSTOMIZE_DIR" && kustomize edit set image \
nativelink=localhost:5001/nativelink:$(\
nix eval .#image.imageTag --raw) \
nativelink-worker-lre-cc=localhost:5001/nativelink-worker-lre-cc:$(\
nix eval .#nativelink-worker-lre-cc.imageTag --raw) \
nativelink=localhost:5001/nativelink:"$(\
nix eval .#image.imageTag --raw)" \
nativelink-worker-init=localhost:5001/nativelink-worker-init:"$(\
nix eval .#nativelink-worker-init.imageTag --raw)" \
nativelink-worker-lre-cc=localhost:5001/nativelink-worker-lre-cc:"$(\
nix eval .#nativelink-worker-lre-cc.imageTag --raw)"
# TODO(aaronmondal): Fix java and add this:
# nativelink-worker-lre-java=localhost:5001/nativelink-worker-lre-java:$(\
Expand Down
9 changes: 4 additions & 5 deletions deployment-examples/kubernetes/03_delete_application.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Get the nix derivation hash from the toolchain container, change the
# `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and delete
# the configuration.
#!/usr/bin/env bash

KUSTOMIZE_DIR=$(git rev-parse --show-toplevel)/deployment-examples/kubernetes
# Delete the Kustomization but leave the rest of the cluster intact.

kubectl delete -k "$KUSTOMIZE_DIR"
kubectl delete -k \
"$(git rev-parse --show-toplevel)/deployment-examples/kubernetes"
63 changes: 38 additions & 25 deletions deployment-examples/kubernetes/worker-lre-cc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,39 @@ spec:
app: nativelink-worker-lre-cc
spec:
initContainers:
- name: setup-entrypoint
image: nixpkgs/nix-flakes:latest
command: ["/bin/sh", "-c"]
# The kind setup mounts the nativelink repository into the kind nodes at
# `/mnt/src_root`. This ensures that the tags between the worker configs
# and bazel toolchains match when this setup is run in CI.
#
# WARNING: The platform is *not* necessarily the container that is
# actually deployed here. The generator container in this example was
# `rbe-autogen-lre-cc:<sometag>` and the platform was modified
# after the fact to be `lre-cc:<sometag>`. The deployed container
# we use as worker is
# `nativelink-worker-lre-cc:<some_potentially_other_tag>` which is a
# completely separate extension of the `lre-cc` base image.
args:
- |
NATIVELINK_WORKER_PLATFORM=docker://lre-cc:$(nix eval /mnt/src_root#lre-cc.imageTag --raw) &&
printf '#!/bin/sh\nexport NATIVELINK_WORKER_PLATFORM=%s\nexec "$@"' "$NATIVELINK_WORKER_PLATFORM" > /entrypoint/entrypoint.sh &&
chmod +x /entrypoint/entrypoint.sh
volumeMounts:
- name: entrypoint
mountPath: /entrypoint
- name: mnt
mountPath: /mnt
- name: setup-entrypoint
image: nixpkgs/nix-flakes:latest
command: ["/bin/sh", "-c"]
# The kind setup mounts the nativelink repository into the kind nodes
# at `/mnt/src_root`. This ensures that the tags between the worker
# configs and bazel toolchains match when this setup is run in CI.
#
# WARNING: The platform is *not* necessarily the container that is
# actually deployed here. The generator container in this example was
# `rbe-autogen-lre-cc:<sometag>` and the platform was modified
# after the fact to be `lre-cc:<sometag>`. The deployed container
# we use as worker is
# `nativelink-worker-lre-cc:<some_potentially_other_tag>` which is a
# completely separate extension of the `lre-cc` base image.
args:
- |
NATIVELINK_WORKER_PLATFORM=docker://lre-cc:$(nix eval /mnt/src_root#lre-cc.imageTag --raw) &&
printf '#!/bin/sh\nexport NATIVELINK_WORKER_PLATFORM=%s\nexec "$@"' "$NATIVELINK_WORKER_PLATFORM" > /entrypoint/entrypoint.sh &&
chmod +x /entrypoint/entrypoint.sh
volumeMounts:
- name: entrypoint
mountPath: /entrypoint
- name: mnt
mountPath: /mnt

- name: nativelink-worker-init
# This image will be edited by kustomize.
image: nativelink-worker-init
args: ["/shared/nativelink"]
volumeMounts:
- name: shared
mountPath: /shared

containers:
- name: nativelink-worker-lre-cc
# This image will be edited by kustomize.
Expand All @@ -55,9 +64,13 @@ spec:
subPath: worker.json
- name: entrypoint
mountPath: /entrypoint
- name: shared
mountPath: /shared
command: ["/entrypoint/entrypoint.sh"]
args: ["/bin/nativelink", "/worker.json"]
args: ["/shared/nativelink", "/worker.json"]
volumes:
- name: shared
emptyDir: {}
- name: entrypoint
emptyDir: {}
- name: worker-config
Expand Down
47 changes: 29 additions & 18 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@

publish-ghcr = import ./tools/publish-ghcr.nix {inherit pkgs;};

local-image-test = import ./tools/local-image-test.nix {inherit pkgs;};
local-image-test = import ./tools/local-image-test.nix {inherit pkgs nativelink;};

nativelink-is-executable-test = import ./tools/nativelink-is-executable-test.nix {inherit pkgs nativelink;};

Expand All @@ -150,8 +150,33 @@
inherit (nix2container.packages.${system}.nix2container) pullImage;
inherit (nix2container.packages.${system}.nix2container) buildImage;

nativelink-image = buildImage {
name = "nativelink";
copyToRoot = [
(pkgs.buildEnv {
name = "nativelink-buildEnv";
paths = [nativelink];
pathsToLink = ["/bin"];
})
];
config = {
Entrypoint = [(pkgs.lib.getExe' nativelink "nativelink")];
Labels = {
"org.opencontainers.image.description" = "An RBE compatible, high-performance cache and remote executor.";
"org.opencontainers.image.documentation" = "https://github.com/TraceMachina/nativelink";
"org.opencontainers.image.licenses" = "Apache-2.0";
"org.opencontainers.image.revision" = "${self.rev or self.dirtyRev or "dirty"}";
"org.opencontainers.image.source" = "https://github.com/TraceMachina/nativelink";
"org.opencontainers.image.title" = "NativeLink";
"org.opencontainers.image.vendor" = "Trace Machina, Inc.";
};
};
};

nativelink-worker-init = pkgs.callPackage ./tools/nativelink-worker-init.nix {inherit buildImage self nativelink-image;};

rbe-autogen = import ./local-remote-execution/rbe-autogen.nix {inherit pkgs nativelink buildImage llvmPackages;};
createWorker = import ./tools/create-worker.nix {inherit pkgs nativelink buildImage self;};
createWorker = import ./tools/create-worker.nix {inherit pkgs buildImage self;};
siso-chromium = buildImage {
name = "siso-chromium";
fromImage = pullImage {
Expand Down Expand Up @@ -203,7 +228,7 @@
};
};
packages = rec {
inherit publish-ghcr local-image-test nativelink-is-executable-test nativelink nativelink-debug native-cli lre-cc;
inherit publish-ghcr local-image-test nativelink-is-executable-test nativelink nativelink-debug native-cli lre-cc nativelink-worker-init;
default = nativelink;

rbe-autogen-lre-cc = rbe-autogen lre-cc;
Expand All @@ -213,21 +238,7 @@
nativelink-worker-lre-java = createWorker lre-java;
nativelink-worker-siso-chromium = createWorker siso-chromium;
nativelink-worker-toolchain-drake = createWorker toolchain-drake;
image = buildImage {
name = "nativelink";
config = {
Entrypoint = [(pkgs.lib.getExe' nativelink "nativelink")];
Labels = {
"org.opencontainers.image.description" = "An RBE compatible, high-performance cache and remote executor.";
"org.opencontainers.image.documentation" = "https://github.com/TraceMachina/nativelink";
"org.opencontainers.image.licenses" = "Apache-2.0";
"org.opencontainers.image.revision" = "${self.rev or self.dirtyRev or "dirty"}";
"org.opencontainers.image.source" = "https://github.com/TraceMachina/nativelink";
"org.opencontainers.image.title" = "NativeLink";
"org.opencontainers.image.vendor" = "Trace Machina, Inc.";
};
};
};
image = nativelink-image;
};
checks = {
# TODO(aaronmondal): Fix the tests.
Expand Down
Loading

0 comments on commit feadd39

Please sign in to comment.