diff --git a/.github/workflows/flash_attention.yml b/.github/workflows/flash_attention.yml
index c6fc6635..201aaff1 100644
--- a/.github/workflows/flash_attention.yml
+++ b/.github/workflows/flash_attention.yml
@@ -60,7 +60,7 @@ jobs:
             pushd fa4
             python setup.py install
             pip install -e flash_attn/cute/
-            
+
             echo '<h1>B200' >> /tmp/workspace/fa4_output.txt
             nvidia-smi -q -d POWER | grep 'Current Power Limit' | head -1 | cut  -d : -f 2 >> /tmp/workspace/fa4_output.txt
             echo '</h1>' >> /tmp/workspace/fa4_output.txt
diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index ef9cdc1c..f292c5c5 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -327,9 +327,17 @@ jobs:
             "${DOCKER_IMAGE}"
           )
           if [[ "${DEVICE_NAME}" == "cuda" ]]; then
-            docker exec -t "${container_name}" bash -c "pip install torchao==0.14.1 fbgemm-gpu-genai==1.4.1"
+            docker exec -t "${container_name}" bash -c "
+              pip install torchao==0.14.1 fbgemm-gpu-genai==1.4.1
+
+              # A quick mitigation for https://github.com/vllm-project/vllm/issues/32373
+              rm /etc/ld.so.conf.d/00-cuda-compat.conf || true
+              ldconfig
+            "
           fi
-          docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh"
+          docker exec -t "${container_name}" bash -c "
+            cd vllm-benchmarks/vllm && bash .buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
+          "
 
       - name: Authenticate with AWS
         # AWS CUDA runners already have access to the bucket via its runner IAM role