pytorch · huydhn · Jan 16, 2026 · Jan 15, 2026
diff --git a/.github/workflows/flash_attention.yml b/.github/workflows/flash_attention.yml
@@ -60,7 +60,7 @@ jobs:
             pushd fa4
             python setup.py install
             pip install -e flash_attn/cute/
-            
+
             echo '<h1>B200' >> /tmp/workspace/fa4_output.txt
             nvidia-smi -q -d POWER | grep 'Current Power Limit' | head -1 | cut  -d : -f 2 >> /tmp/workspace/fa4_output.txt
             echo '</h1>' >> /tmp/workspace/fa4_output.txt

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
@@ -327,9 +327,17 @@ jobs:
             "${DOCKER_IMAGE}"
           )
           if [[ "${DEVICE_NAME}" == "cuda" ]]; then
-            docker exec -t "${container_name}" bash -c "pip install torchao==0.14.1 fbgemm-gpu-genai==1.4.1"
+            docker exec -t "${container_name}" bash -c "
+              pip install torchao==0.14.1 fbgemm-gpu-genai==1.4.1
+
+              # A quick mitigation for https://github.com/vllm-project/vllm/issues/32373
+              rm /etc/ld.so.conf.d/00-cuda-compat.conf || true
+              ldconfig
+            "
           fi
-          docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh"
+          docker exec -t "${container_name}" bash -c "
+            cd vllm-benchmarks/vllm && bash .buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
+          "
 
       - name: Authenticate with AWS
         # AWS CUDA runners already have access to the bucket via its runner IAM role