Skip to content

Commit 3396946

Browse files
committed
Add prefill heavy e2e benchmarking test to github actions.
1 parent 5033e19 commit 3396946

File tree

2 files changed

+283
-1
lines changed

2 files changed

+283
-1
lines changed
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
name: GKE Prefill Heavy Test
2+
3+
on:
4+
# Runs with a PR comment /run-gke-prefill-heavy
5+
issue_comment:
6+
types: [created]
7+
workflow_dispatch:
8+
inputs:
9+
pr_or_branch:
10+
description: 'Pull-request number or branch name to test'
11+
required: true
12+
default: 'main'
13+
type: string
14+
15+
permissions:
16+
contents: read
17+
18+
jobs:
19+
deploy_and_validate:
20+
if: >
21+
github.event_name == 'workflow_dispatch' ||
22+
(
23+
github.event_name == 'issue_comment' &&
24+
github.event.issue.pull_request &&
25+
github.event.issue.pull_request.base.ref == 'main' &&
26+
contains(github.event.comment.body, '/run-gke-prefill-heavy')
27+
&&
28+
(
29+
github.event.comment.author_association == 'OWNER' ||
30+
github.event.comment.author_association == 'MEMBER' ||
31+
github.event.comment.author_association == 'COLLABORATOR'
32+
)
33+
)
34+
name: Test on ${{ matrix.accelerator.name }}
35+
runs-on: ubuntu-latest
36+
37+
strategy:
38+
fail-fast: false
39+
max-parallel: 1
40+
matrix:
41+
accelerator:
42+
- name: GPU
43+
pod_readiness_sleep_seconds: 180
44+
45+
env:
46+
GCP_PROJECT_ID: llm-d-scale
47+
GKE_CLUSTER_NAME: llm-d-e2e-us-east5
48+
GKE_CLUSTER_ZONE: us-east5
49+
NAMESPACE: igw-prefill-heavy
50+
GATEWAY: gke-l7-regional-external-managed
51+
GATEWAY_TYPE: gke
52+
PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }}
53+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
54+
MODEL: meta-llama/Llama-3.1-8B-Instruct
55+
GSA_EMAIL: ${{ secrets.GCS_WORKLOAD_SA }}
56+
GCS_BUCKET: igw-e2e-benchmark-results
57+
KSA_NAME: igw-e2e-benchmark-sa
58+
59+
steps:
60+
- name: Checkout
61+
uses: actions/checkout@v4
62+
with:
63+
persist-credentials: false
64+
65+
- name: Determine if pr_or_branch is a PR number
66+
id: check_pr
67+
env:
68+
PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch }}
69+
shell: bash
70+
run: |
71+
echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV"
72+
if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
73+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
74+
elif [[ "${{ github.event_name }}" = "pull_request" ]]; then
75+
echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
76+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
77+
else
78+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
79+
fi
80+
81+
- name: Fetch and checkout PR
82+
if: steps.check_pr.outputs.is_pr == 'true'
83+
run: |
84+
git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH"
85+
git checkout pr-"$PR_OR_BRANCH"
86+
87+
- name: Checkout branch
88+
if: steps.check_pr.outputs.is_pr == 'false'
89+
run: git checkout "$PR_OR_BRANCH"
90+
91+
- name: Authenticate to Google Cloud
92+
id: auth
93+
uses: google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5
94+
with:
95+
credentials_json: ${{ secrets.GCP_SA_KEY }}
96+
97+
- name: Set up gcloud CLI and kubectl
98+
uses: google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397
99+
with:
100+
project_id: ${{ env.GCP_PROJECT_ID }}
101+
install_components: 'kubectl,gke-gcloud-auth-plugin'
102+
103+
- name: Get GKE credentials
104+
run: |
105+
gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}"
106+
107+
- name: Create namespace
108+
run: |
109+
kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists"
110+
111+
- name: Create hf-token secret
112+
run: |
113+
kubectl create secret generic hf-token \
114+
--from-literal="token=${{ secrets.HF_TOKEN }}" \
115+
--namespace "${NAMESPACE}" \
116+
--dry-run=client -o yaml | kubectl apply -f -
117+
118+
- name: Create and Annotate KSA for Workload Identity
119+
run: |
120+
kubectl create serviceaccount $KSA_NAME --namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f -
121+
kubectl annotate serviceaccount $KSA_NAME \
122+
iam.gke.io/gcp-service-account=$GSA_EMAIL \
123+
--overwrite \
124+
--namespace "${NAMESPACE}"
125+
126+
- name: Deploy Model Server and CRDs
127+
run: |
128+
cd config/manifests/vllm
129+
echo "Deploying Model Server..."
130+
kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
131+
echo "Installing CRDs"
132+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml
133+
echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
134+
135+
- name: Deploy InferencePool and Endpoint Picker Extension
136+
run: |
137+
export IGW_CHART_VERSION=v1.1.0
138+
helm install vllm-llama3-8b-instruct \
139+
--namespace $NAMESPACE \
140+
--set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
141+
--set provider.name=$GATEWAY_TYPE \
142+
--version $IGW_CHART_VERSION \
143+
oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-prefill-heavy-deployment.log
144+
echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
145+
146+
- name: Wait for all pods to be ready
147+
run: |
148+
kubectl wait pod \
149+
--for=condition=Ready \
150+
--all \
151+
-n "${NAMESPACE}" \
152+
--timeout=25m
153+
sleep ${{ matrix.accelerator.pod_readiness_sleep_seconds }} # TODO: remove this once examples have readiness probes
154+
echo "✅ All pods are ready."
155+
kubectl get pods -n "${NAMESPACE}"
156+
157+
- name: Deploy Gateway
158+
run: |
159+
GATEWAY_NAME=inference-gateway
160+
kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
161+
kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found
162+
echo "Deploying Gateway..."
163+
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
164+
echo "Deploying HTTPRoute..."
165+
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/httproute.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
166+
echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
167+
168+
- name: Wait for gateway to be ready
169+
run: |
170+
GATEWAY_NAME=inference-gateway
171+
kubectl wait gateway/${GATEWAY_NAME} \
172+
--for=condition=Programmed=True \
173+
-n "${NAMESPACE}" \
174+
--timeout=500s
175+
echo "✅ Gateway is ready."
176+
kubectl get gateway -n "${NAMESPACE}"
177+
178+
- name: Show deployment status
179+
run: |
180+
echo "=== Deployments ==="
181+
kubectl get deployments -n "${NAMESPACE}"
182+
echo ""
183+
echo "=== Pods ==="
184+
kubectl get pods -n "${NAMESPACE}"
185+
echo ""
186+
echo "=== Services ==="
187+
kubectl get svc -n "${NAMESPACE}"
188+
echo ""
189+
echo "=== Helm releases ==="
190+
helm list -n "${NAMESPACE}" || true
191+
echo ""
192+
echo "=== Inference Pools ==="
193+
kubectl get inferencepools -n "${NAMESPACE}" || true
194+
echo ""
195+
echo "=== HTTPRoutes ==="
196+
kubectl get httproutes -n "${NAMESPACE}" -o yaml || true
197+
echo ""
198+
echo "=== Gateway ==="
199+
kubectl get Gateway -n "${NAMESPACE}" || true
200+
echo ""
201+
202+
- name: Verify installation and run validation test
203+
run: |
204+
cd .github/scripts/e2e
205+
./e2e-validate.sh -n "${NAMESPACE}" -v -m ${MODEL}
206+
207+
- name: Run benchmarking test
208+
run: |
209+
TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S")
210+
cd benchmarking/single-workload
211+
host="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \
212+
-o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}"
213+
if [[ -z "$host" ]]; then
214+
echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2
215+
exit 1
216+
fi
217+
port=80
218+
svc_host="${host}:${port}"
219+
helm install prefill-heavy-benchmark ../inference-perf/ -f prefill-heavy-values.yaml \
220+
--namespace "${NAMESPACE}" \
221+
--create-namespace \
222+
--set hfToken="${HF_TOKEN}" \
223+
--set "config.server.base_url=http://${svc_host}" \
224+
--set "job.serviceAccountName=$KSA_NAME" \
225+
--set "job.image.tag=v0.2.0" \
226+
--set "config.storage.google_cloud_storage.bucket_name=${GCS_BUCKET}" \
227+
--set "config.storage.google_cloud_storage.path=${NAMESPACE}/${TIMESTAMP}" \
228+
--set "gcsPath=gs://${GCS_BUCKET}/datasets/billsum_conversations.json" \
229+
--set "config.data.path=/gcsDataset/gcs-dataset.json" \
230+
--set-string 'job.resources.limits.nvidia\.com/gpu=1'
231+
232+
- name: Wait for benchmarking job to finish
233+
run: |
234+
job_name=prefill-heavy-benchmark-inference-perf-job
235+
TIMEOUT_DURATION="7200s"
236+
if ! kubectl wait --for=condition=complete job/"$job_name" -n "$NAMESPACE" --timeout="$TIMEOUT_DURATION"; then
237+
echo "Error: Benchmark job $job_name did not complete successfully within $TIMEOUT_DURATION." >&2
238+
echo "--- Job Description ---" >&2
239+
kubectl describe job "$job_name" -n "$NAMESPACE" >&2
240+
echo "--- Pod Logs (Last 50 lines) ---" >&2
241+
kubectl logs -l job-name="$job_name" -n "$NAMESPACE" --all-containers=true --tail 50 >&2
242+
exit 1
243+
fi
244+
echo "✅ Benchmarking Job Completed."
245+
246+
- name: Collect and upload Kubernetes pod logs
247+
if: always()
248+
run: |
249+
mkdir -p pod-logs-inference-prefill-heavy
250+
cd pod-logs-inference-prefill-heavy
251+
echo "Fetching ${NAMESPACE} pods log..."
252+
kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
253+
| xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1'
254+
echo "Fetching ${NAMESPACE} pods descriptions..."
255+
kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
256+
| xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1'
257+
mv ~/igw-prefill-heavy-deployment.log . || true
258+
mv ~/install-deps.log . || true
259+
260+
- name: Upload pod logs as artifact
261+
uses: actions/upload-artifact@v4
262+
if: always()
263+
with:
264+
name: igw-pod-logs-inference-prefill-heavy-${{ matrix.accelerator.name }}
265+
path: pod-logs-inference-prefill-heavy
266+
267+
- name: Send Google Chat notification on failure
268+
if: failure()
269+
uses: SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb
270+
with:
271+
webhookUrl: ${{ secrets.GOOGLE_CHAT_WEBHOOK }}
272+
jobStatus: ${{ job.status }}
273+
title: '${{ github.workflow }} - ${{ matrix.accelerator.name }}'
274+
275+
- name: Cleanup deployment
276+
if: always()
277+
run: |
278+
GATEWAY_NAME=inference-gateway
279+
helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE} --ignore-not-found
280+
helm uninstall prefill-heavy-benchmark -n ${NAMESPACE} --ignore-not-found
281+
kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
282+
kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found

benchmarking/single-workload/prefill-heavy-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ logLevel: INFO
2020
# A GCS bucket path that points to the dataset file.
2121
# The file will be copied from this path to the local file system
2222
# at /dataset/dataset.json for use during the run.
23-
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
23+
# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/gcs-dataset.json.
2424
gcsPath: ""
2525

2626
# An S3 bucket path that points to the dataset file.

0 commit comments

Comments
 (0)