diff --git a/.github/workflows/flash_attention.yml b/.github/workflows/flash_attention.yml index c6fc6635..201aaff1 100644 --- a/.github/workflows/flash_attention.yml +++ b/.github/workflows/flash_attention.yml @@ -60,7 +60,7 @@ jobs: pushd fa4 python setup.py install pip install -e flash_attn/cute/ - + echo '

B200' >> /tmp/workspace/fa4_output.txt nvidia-smi -q -d POWER | grep 'Current Power Limit' | head -1 | cut -d : -f 2 >> /tmp/workspace/fa4_output.txt echo '

' >> /tmp/workspace/fa4_output.txt diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index ef9cdc1c..f292c5c5 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -327,9 +327,17 @@ jobs: "${DOCKER_IMAGE}" ) if [[ "${DEVICE_NAME}" == "cuda" ]]; then - docker exec -t "${container_name}" bash -c "pip install torchao==0.14.1 fbgemm-gpu-genai==1.4.1" + docker exec -t "${container_name}" bash -c " + pip install torchao==0.14.1 fbgemm-gpu-genai==1.4.1 + + # A quick mitigation for https://github.com/vllm-project/vllm/issues/32373 + rm /etc/ld.so.conf.d/00-cuda-compat.conf || true + ldconfig + " fi - docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh" + docker exec -t "${container_name}" bash -c " + cd vllm-benchmarks/vllm && bash .buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh + " - name: Authenticate with AWS # AWS CUDA runners already have access to the bucket via its runner IAM role