diff --git a/.github/actions/docker-build/action.yml b/.github/actions/docker-build/action.yml index 87e7b243271e..e9d3d68351ac 100644 --- a/.github/actions/docker-build/action.yml +++ b/.github/actions/docker-build/action.yml @@ -34,6 +34,18 @@ inputs: aws_secret_access_key: description: 'AWS Secret Access Key' required: false + base_image_tag: + description: 'Optional override for base image tag passed to build.sh' + required: false + runtime_image_tag: + description: 'Optional override for RUNTIME_IMAGE_TAG build-arg' + required: false + cuda_version: + description: 'Optional override for CUDA_VERSION build-arg' + required: false + torch_backend: + description: 'Optional override for TORCH_BACKEND build-arg (e.g., cu129)' + required: false outputs: image_tag: @@ -81,6 +93,21 @@ runs: echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT + # Collect optional overrides provided by the workflow + EXTRA_ARGS="" + if [ -n "${{ inputs.base_image_tag }}" ]; then + EXTRA_ARGS+=" --base-image-tag ${{ inputs.base_image_tag }}" + fi + if [ -n "${{ inputs.runtime_image_tag }}" ]; then + EXTRA_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }}" + fi + if [ -n "${{ inputs.cuda_version }}" ]; then + EXTRA_ARGS+=" --build-arg CUDA_VERSION=${{ inputs.cuda_version }}" + fi + if [ -n "${{ inputs.torch_backend }}" ]; then + EXTRA_ARGS+=" --build-arg TORCH_BACKEND=${{ inputs.torch_backend }}" + fi + ./container/build.sh --tag "$IMAGE_TAG" \ --target ${{ inputs.target }} \ --vllm-max-jobs 10 \ @@ -88,7 +115,7 @@ runs: --platform ${{ inputs.platform }} \ --use-sccache \ --sccache-bucket "$SCCACHE_S3_BUCKET" \ - --sccache-region "$AWS_DEFAULT_REGION" + --sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) echo "🕐 Build ended at: ${BUILD_END_TIME}" diff --git a/.github/workflows/container-validation-backends.yml b/.github/workflows/container-validation-backends.yml index ec0b41d9c764..1deef8e5ac1e 100644 --- a/.github/workflows/container-validation-backends.yml +++ b/.github/workflows/container-validation-backends.yml @@ -58,6 +58,10 @@ jobs: framework: vllm target: runtime platform: 'linux/${{ matrix.platform.arch }}' + base_image_tag: ${{ matrix.platform.arch == 'arm64' && '25.06-cuda12.9-devel-ubuntu24.04' || '' }} + runtime_image_tag: ${{ matrix.platform.arch == 'arm64' && '12.9.0-runtime-ubuntu24.04' || '' }} + cuda_version: ${{ matrix.platform.arch == 'arm64' && '129' || '' }} + torch_backend: ${{ matrix.platform.arch == 'arm64' && 'cu129' || '' }} ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} @@ -251,4 +255,4 @@ jobs: CONTAINER_INDEX: ${{ secrets.CONTAINER_INDEX }} run: | # Upload complete workflow metrics including container metrics - python3 .github/workflows/upload_complete_workflow_metrics.py \ No newline at end of file + python3 .github/workflows/upload_complete_workflow_metrics.py diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index bb0416ac07db..d2337637953e 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -88,6 +88,10 @@ RUN apt-get update -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +# if libmlx5.so not shipped with 24.04 rdma-core packaging, CMAKE will fail when looking for +# generic dev name .so so we symlink .s0.1 -> .so +RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so || true + ### VIRTUAL ENVIRONMENT SETUP ### COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/container/deps/vllm/install_vllm.sh b/container/deps/vllm/install_vllm.sh index 195981194991..0ebbb58823fb 100755 --- a/container/deps/vllm/install_vllm.sh +++ b/container/deps/vllm/install_vllm.sh @@ -136,9 +136,9 @@ git checkout $VLLM_REF echo "\n=== Installing vLLM & FlashInfer ===" -if [[ $VLLM_REF =~ ^v ]] && [ "$ARCH" = "amd64" ]; then - # VLLM_REF starts with 'v' and amd64 - use pip install with version tag - echo "Installing vLLM $VLLM_REF from PyPI..." +if [[ $VLLM_REF =~ ^v ]] && { [ "$ARCH" = "amd64" ] || { [ "$ARCH" = "arm64" ] && [ "$TORCH_BACKEND" = "cu129" ]; }; }; then + # VLLM_REF starts with 'v' and either amd64, or arm64 with cu129 backend - use PyPI install + echo "Installing vLLM $VLLM_REF from PyPI... (ARCH=$ARCH, TORCH_BACKEND=$TORCH_BACKEND)" uv pip install vllm[flashinfer]==$VLLM_REF --torch-backend=$TORCH_BACKEND