aaronmondal
diff --git a/‎.bazelrc
+7 b/‎.bazelrc
+7
diff --git a/‎.github/workflows/lre.yaml
+59 b/‎.github/workflows/lre.yaml
+59
diff --git a/‎deployment-examples/kubernetes/00_infra.sh
+127 b/‎deployment-examples/kubernetes/00_infra.sh
+127
diff --git a/‎deployment-examples/kubernetes/01_operations.sh
+26 b/‎deployment-examples/kubernetes/01_operations.sh
+26
diff --git a/‎deployment-examples/kubernetes/README.md
+39 b/‎deployment-examples/kubernetes/README.md
+39
diff --git a/‎deployment-examples/kubernetes/cas.json
+100 b/‎deployment-examples/kubernetes/cas.json
+100
@@ -60,3 +60,10 @@ build:windows --enable_runfiles
 build:lre --incompatible_enable_cc_toolchain_resolution
 build:lre --define=EXECUTOR=remote
 build:lre --action_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
+
+# Flags for integration tests running on Kubernetes.
+
+build:k8s --config=lre
+build:k8s --remote_instance_name=main
+build:k8s --remote_cache=grpc://172.20.255.200:50051
+build:k8s --remote_executor=grpc://172.20.255.201:50052
@@ -38,3 +38,62 @@ jobs:
            --config=lre \
            --verbose_failures \
            //local-remote-execution/examples:hello_lre"
+
+  remote:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-22.04]
+    name: Remote / ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout
+        uses: >- # v4.1.1
+          actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Install Nix
+        uses: >- #v7
+          DeterminateSystems/nix-installer-action@5620eb4af6b562c53e4d4628c0b6e4f9d9ae8612
+
+      - name: Cache Nix derivations
+        uses: >- # Custom commit, last pinned at 2023-11-17.
+          DeterminateSystems/magic-nix-cache-action@a04e6275a6bea232cd04fc6f3cbf20d4cb02a3e1
+
+      - name: Start Kubernetes cluster
+        run: >
+          nix develop --impure --command
+          bash -c "cd deployment-examples/kubernetes \
+            && ./00_infra.sh \
+            && ./01_operations.sh \
+            && kubectl apply -k ."
+
+      - name: Print gateways
+        run: |
+          kubectl get gtw
+
+      - name: Get gateway IPs
+        id: gateway-ips
+        run: |
+          cache_ip=$(kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}')
+          echo 'Cache IP: $cache_ip'
+          echo 'cache_ip=$cache_ip' >> '$GITHUB_ENV'
+
+          scheduler_ip=$(kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}')
+          echo 'Scheduler IP: $scheduler_ip'
+          echo 'scheduler_ip=$scheduler_ip' >> '$GITHUB_ENV'"
+
+      - name: Print gateways again
+        run: |
+          echo 'Cache IP: $cache_ip'
+          echo 'Scheduler IP: $scheduler_ip'
+
+      - name: Build hello_lre with LRE toolchain.
+        run: >
+          nix develop --impure --command
+          bash -c "bazel run \
+            --config=lre \
+            --remote_instance_name=main \
+            --remote_cache=grpc://$cache_ip:50051 \
+            --remote_executor=grpc://$scheduler_ip:50052 \
+            --verbose_failures \
+            //local-remote-execution/examples:hello_lre"
@@ -0,0 +1,127 @@
+# This script sets up a local development cluster. It's roughly equivalent to
+# a managed K8s setup.
+
+# For ease of development and to save disk space we pipe a local container
+# registry through to kind.
+#
+# See https://kind.sigs.k8s.io/docs/user/local-registry/.
+
+reg_name='kind-registry'
+reg_port='5001'
+if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" != 'true' ]; then
+  docker run \
+    -d --restart=always -p "127.0.0.1:${reg_port}:5000" --network bridge --name "${reg_name}" \
+    registry:2
+fi
+
+# Start a basic cluster. We use cilium's CNI and eBPF kube-proxy replacement.
+
+cat <<EOF |  kind create cluster --config -
+---
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+  - role: control-plane
+  - role: worker
+  - role: worker
+networking:
+  disableDefaultCNI: true
+  kubeProxyMode: none
+containerdConfigPatches:
+  - |-
+    [plugins."io.containerd.grpc.v1.cri".registry]
+      config_path = "/etc/containerd/certs.d"
+EOF
+
+# Enable the registry on the nodes.
+
+REGISTRY_DIR="/etc/containerd/certs.d/localhost:${reg_port}"
+for node in $(kind get nodes); do
+  docker exec "${node}" mkdir -p "${REGISTRY_DIR}"
+  cat <<EOF | docker exec -i "${node}" cp /dev/stdin "${REGISTRY_DIR}/hosts.toml"
+[host."http://${reg_name}:5000"]
+EOF
+done
+
+# Connect the registry to the cluster network.
+
+if [ "$(docker inspect -f='{{json .NetworkSettings.Networks.kind}}' "${reg_name}")" = 'null' ]; then
+  docker network connect "kind" "${reg_name}"
+fi
+
+# Advertise the registry location.
+
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: local-registry-hosting
+  namespace: kube-public
+data:
+  localRegistryHosting.v1: |
+    host: "localhost:${reg_port}"
+    help: "https://kind.sigs.k8s.io/docs/user/local-registry/"
+EOF
+
+# Prepare Gateway API CRDs. These MUST be available before we start cilium.
+
+kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.0.0/experimental-install.yaml
+
+kubectl wait --for condition=Established crd/gatewayclasses.gateway.networking.k8s.io
+kubectl wait --for condition=Established crd/gateways.gateway.networking.k8s.io
+kubectl wait --for condition=Established crd/httproutes.gateway.networking.k8s.io
+kubectl wait --for condition=Established crd/tlsroutes.gateway.networking.k8s.io
+kubectl wait --for condition=Established crd/grpcroutes.gateway.networking.k8s.io
+kubectl wait --for condition=Established crd/referencegrants.gateway.networking.k8s.io
+
+# Start cilium.
+
+helm repo add cilium https://helm.cilium.io
+
+helm upgrade \
+    --install cilium cilium/cilium \
+    --version 1.15.0-pre.3 \
+    --namespace kube-system \
+    --set k8sServiceHost=kind-control-plane \
+    --set k8sServicePort=6443 \
+    --set kubeProxyReplacement=strict \
+    --set gatewayAPI.enabled=true \
+    --wait
+
+# Set up MetalLB. Kind's nodes are containers running on the local docker
+# network. We reuse that network for LB-IPAM so that LoadBalancers are available
+# via "real" local IPs.
+
+KIND_NET_CIDR=$(docker network inspect kind -f '{{(index .IPAM.Config 0).Subnet}}')
+METALLB_IP_START=$(echo ${KIND_NET_CIDR} | sed "[email protected]/[email protected]@")
+METALLB_IP_END=$(echo ${KIND_NET_CIDR} | sed "[email protected]/[email protected]@")
+METALLB_IP_RANGE="${METALLB_IP_START}-${METALLB_IP_END}"
+
+helm install --namespace metallb-system --create-namespace \
+  --repo https://metallb.github.io/metallb metallb metallb \
+  --version 0.13.12 \
+  --wait
+
+cat <<EOF | kubectl apply -f -
+---
+apiVersion: metallb.io/v1beta1
+kind: L2Advertisement
+metadata:
+  name: l2-ip
+  namespace: metallb-system
+spec:
+  ipAddressPools:
+    - default-pool
+---
+apiVersion: metallb.io/v1beta1
+kind: IPAddressPool
+metadata:
+  name: default-pool
+  namespace: metallb-system
+spec:
+  addresses:
+    - ${METALLB_IP_RANGE}
+EOF
+
+# At this point we have a similar setup to the one that we'd get with a cloud
+# provider. Move on to `01_operations.sh` for the cluster setup.
@@ -0,0 +1,26 @@
+# This script configures a cluster with a few standard deployments.
+
+# TODO(aaronmondal): Add Grafana, OpenTelemetry and the various other standard
+#                    deployments one would expect in a cluster.
+
+kubectl apply -f gateway.yaml
+
+IMAGE_TAG=$(nix eval .#image.imageTag --raw)
+
+$(nix build .#image --print-build-logs --verbose) \
+    && ./result \
+    | skopeo \
+      copy \
+      --dest-tls-verify=false \
+      docker-archive:/dev/stdin \
+      docker://localhost:5001/native-link:local
+
+IMAGE_TAG=$(nix eval .#lre.imageTag --raw)
+
+$(nix build .#lre --print-build-logs --verbose) \
+    && ./result \
+    | skopeo \
+      copy \
+      --dest-tls-verify=false \
+      docker-archive:/dev/stdin \
+      docker://localhost:5001/native-link-toolchain:local
@@ -0,0 +1,39 @@
+# Kubernetes example
+
+This deployment sets up a 3-container deployment with separate CAS, scheduler
+and worker. Don't use this example deployment in production. It's insecure.
+
+In this example we're using `kind` to set up the cluster and `cilium` with
+`metallb` to provide a `LoadBalancer` and `GatewayController`.
+
+First set up a local development cluster:
+
+```
+./00_infra.sh
+```
+
+Next start a few standard deployments. This part also builds the remote
+execution containers and makes them available to the cluster:
+
+```
+./01_operations.sh
+```
+
+Finally deploy NativeLink:
+
+```
+kubectl apply -k .
+```
+
+Now you can use the `k8s` configuration for Bazel to use the exposed remote
+cache and executor:
+
+```
+bazel test --config=k8s //:dummy_test
+```
+
+When you're done testing, delete the cluster:
+
+```
+kind delete cluster
+```
@@ -0,0 +1,100 @@
+// This configuration will place objects in various folders in
+// `~/.cache/native-link`. It will store all data on disk and
+// allows for restarts of the underlying service. It is optimized
+// so objects are compressed, deduplicated and uses some in-memory
+// optimizations for certain hot paths.
+{
+  "stores": {
+    "CAS_MAIN_STORE": {
+      "verify": {
+        "backend": {
+          "compression": {
+            "compression_algorithm": {
+              "LZ4": {}
+            },
+            "backend": {
+              "filesystem": {
+                "content_path": "~/.cache/native-link/content_path-cas",
+                "temp_path": "~/.cache/native-link/tmp_path-cas",
+                "eviction_policy": {
+                  // 10gb.
+                  "max_bytes": 10000000000,
+                }
+              }
+            }
+          }
+        },
+        "verify_size": true,
+        "verify_hash": true
+      }
+    },
+    "AC_MAIN_STORE": {
+      "filesystem": {
+        "content_path": "~/.cache/native-link/content_path-ac",
+        "temp_path": "~/.cache/native-link/tmp_path-ac",
+        "eviction_policy": {
+          // 500mb.
+          "max_bytes": 500000000,
+        }
+      }
+    }
+  },
+  "servers": [{
+    "listen_address": "0.0.0.0:50051",
+    "services": {
+      "cas": {
+        "main": {
+          "cas_store": "CAS_MAIN_STORE"
+        }
+      },
+      "ac": {
+        "main": {
+          "ac_store": "AC_MAIN_STORE"
+        }
+      },
+      "capabilities": {},
+      "bytestream": {
+        "cas_stores": {
+          "main": "CAS_MAIN_STORE",
+        },
+        // According to https://github.com/grpc/grpc.github.io/issues/371 16KiB - 64KiB is optimal.
+        "max_bytes_per_stream": 64000, // 64kb.
+      }
+    }
+  }, {
+    // Only publish metrics on a private port.
+    "listen_address": "0.0.0.0:50061",
+    "services": {
+      "prometheus": {
+        "path": "/metrics"
+      }
+    }
+  },
+  {
+    "listen_address": "0.0.0.0:50071",
+    "tls": {
+      "cert_file": "/root/example-do-not-use-in-prod-rootca.crt",
+      "key_file": "/root/example-do-not-use-in-prod-key.pem"
+    },
+    "services": {
+      "cas": {
+        "main": {
+          "cas_store": "CAS_MAIN_STORE"
+        }
+      },
+      "ac": {
+        "main": {
+          "ac_store": "AC_MAIN_STORE"
+        }
+      },
+      "capabilities": {},
+      "bytestream": {
+        "cas_stores": {
+          "main": "CAS_MAIN_STORE",
+        },
+        // According to https://github.com/grpc/grpc.github.io/issues/371 16KiB - 64KiB is optimal.
+        "max_bytes_per_stream": 64000, // 64kb.
+      }
+    }
+  }]
+}