From 4e4a0c68d8b898a1bcc64648fc39c2d02dd532ae Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 11 Dec 2025 09:36:18 +0000 Subject: [PATCH 01/32] add Dockerfile.rocm Signed-off-by: tjtanaa --- docker/Dockerfile.rocm | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 docker/Dockerfile.rocm diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm new file mode 100644 index 00000000000..6e774135e26 --- /dev/null +++ b/docker/Dockerfile.rocm @@ -0,0 +1,41 @@ +ARG ROCM_BASE_IMAGE=rocm/vllm-dev +ARG ROCM_BASE_TAG=nightly_main_20251005 +FROM ${ROCM_BASE_IMAGE}:${ROCM_BASE_TAG} + +ARG APP_DIR=/workspace/vllm-omni +ARG VLLM_VERSION=v0.11.0 +ARG PYTORCH_ROCM_ARCH="gfx942;gfx950" + +WORKDIR ${APP_DIR} + +# Step 1: Setup - Install system dependencies +RUN apt-get update && \ + apt-get install -y ffmpeg && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Step 2: Reinstall vllm from source +RUN cd ../ && python3 -m pip uninstall -y vllm && \ + git clone https://github.com/vllm-project/vllm.git && \ + cd vllm && \ + git checkout ${VLLM_VERSION} && \ + python3 -c "import setuptools_scm; print(setuptools_scm.get_version())" && \ + PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} python3 setup.py develop && \ + cd / && \ + rm -rf vllm/.git + +# Step 3: Copy vllm-omni code and install without uv +COPY . ${APP_DIR} +RUN python3 -m pip install --no-cache-dir ".[dev]" + +# Create python symlink +RUN ln -sf /usr/bin/python3 /usr/bin/python + +# Step 4: Set environment variables for ROCm optimization +ENV MIOPEN_FIND_MODE=FAST +ENV VLLM_ROCM_USE_AITER=1 +ENV VLLM_ROCM_USE_AITER_MHA=1 +ENV VLLM_ROCM_USE_AITER_LINEAR=0 +ENV VLLM_ROCM_USE_AITER_RMSNORM=0 + +ENTRYPOINT [] \ No newline at end of file From 2b7ff410af666cbdf001df5f71a3e58ea1e92f1f Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 11 Dec 2025 10:05:08 +0000 Subject: [PATCH 02/32] add dockerfile build instruction Signed-off-by: tjtanaa --- docs/getting_started/installation/gpu.md | 9 +++-- .../installation/gpu/rocm.inc.md | 38 +++++++++++++++++-- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md index 5956ed102de..03758fd9354 100644 --- a/docs/getting_started/installation/gpu.md +++ b/docs/getting_started/installation/gpu.md @@ -49,11 +49,14 @@ vLLM-Omni is a Python library that supports the following GPU variants. The libr ## Set up using Docker -### Build wheel from source -=== "NVIDIA CUDA" +### Build your own docker image + +=== "AMD ROCm" + + --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-docker" - --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:build-wheel-from-source-in-docker" +### Build wheel from source === "AMD ROCm" diff --git a/docs/getting_started/installation/gpu/rocm.inc.md b/docs/getting_started/installation/gpu/rocm.inc.md index 3b970267a28..c925dece380 100644 --- a/docs/getting_started/installation/gpu/rocm.inc.md +++ b/docs/getting_started/installation/gpu/rocm.inc.md @@ -69,9 +69,6 @@ python -c "import setuptools_scm; print(setuptools_scm.get_version())" PYTORCH_ROCM_ARCH=gfx942 python3 setup.py develop ``` -!!! note - vLLM release wheels based on the branch with prefix `releases/`, not from the tag as vLLM may cherry pick bugfixes after cutting a branch. - #### Installation of vLLM-Omni @@ -110,6 +107,41 @@ export VLLM_ROCM_USE_AITER_RMSNORM=0 # --8<-- [end:build-wheel-from-source-in-docker] +# --8<-- [start:build-docker] + +#### Build docker image + +```bash +DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm -t vllm-omni-rocm . +``` + +If you want to specify which GPU Arch to build for to cutdown build time: + +```bash +DOCKER_BUILDKIT=1 docker build \ + -f docker/Dockerfile.rocm \ + --build-arg PYTORCH_ROCM_ARCH="gfx942;gfx950" \ + -t vllm-omni-rocm . +``` + +#### Launch the docker image + +``` +docker run -it \ +--network=host \ +--group-add=video \ +--ipc=host \ +--cap-add=SYS_PTRACE \ +--security-opt seccomp=unconfined \ +--device /dev/kfd \ +--device /dev/dri \ +-v :/app/model \ +vllm-omni-rocm \ +bash +``` + +# --8<-- [end:build-docker] + # --8<-- [start:pre-built-images] # --8<-- [end:pre-built-images] From b03d282a85e0cf6f1a1428333654c2edae349ca9 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 11 Dec 2025 14:50:00 +0000 Subject: [PATCH 03/32] add preliminary CI files Signed-off-by: tjtanaa --- .buildkite/bootstrap-omni-amd.sh | 677 ++++++++++++++++++ .../scripts/hardware_ci/run-amd-test.sh | 113 +++ .buildkite/test-amd.yaml | 34 + 3 files changed, 824 insertions(+) create mode 100755 .buildkite/bootstrap-omni-amd.sh create mode 100755 .buildkite/scripts/hardware_ci/run-amd-test.sh create mode 100644 .buildkite/test-amd.yaml diff --git a/.buildkite/bootstrap-omni-amd.sh b/.buildkite/bootstrap-omni-amd.sh new file mode 100755 index 00000000000..724050cd477 --- /dev/null +++ b/.buildkite/bootstrap-omni-amd.sh @@ -0,0 +1,677 @@ +#!/bin/bash +# vLLM-Omni AMD CI Bootstrap +# Intelligent CI orchestration following vLLM's ci-infra approach +# +# Features: +# - Smart change detection (docs-only skip, critical files) +# - Pure bash YAML parsing +# - Test filtering by source_file_dependencies and mirror_hardwares +# - GitHub PR label support (ready-run-all-tests, ci-no-fail-fast) +# - Dynamic Buildkite pipeline generation + +set -euo pipefail + +#============================================================================== +# SECTION 1: INITIALIZATION & ENVIRONMENT DETECTION +#============================================================================== + +# Enable debugging if requested +DEBUG="${VLLM_CI_DEBUG:-0}" +[[ "$DEBUG" == "1" ]] && set -x + +echo "=== vLLM-Omni AMD CI Bootstrap ===" +echo "Timestamp: $(date -u +"%Y-%m-%d %H:%M:%S UTC")" +echo "Branch: ${BUILDKITE_BRANCH:-unknown}" +echo "Commit: ${BUILDKITE_COMMIT:-unknown}" +echo "Pull Request: ${BUILDKITE_PULL_REQUEST:-none}" +echo "" + +# Validate environment +if [ ! -d ".buildkite" ]; then + echo "Error: .buildkite directory not found" + echo "Please run this script from the repository root" + exit 1 +fi + +if [ ! -f ".buildkite/test-amd.yaml" ]; then + echo "Error: .buildkite/test-amd.yaml not found" + exit 1 +fi + +# Validate git repository +if ! git rev-parse --git-dir > /dev/null 2>&1; then + echo "Error: Not a git repository" + exit 1 +fi + +# Determine base branch for comparison +if [[ "${BUILDKITE_PULL_REQUEST:-false}" != "false" ]]; then + BASE_BRANCH="${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-main}" +else + BASE_BRANCH="main" +fi + +echo "Base branch for comparison: ${BASE_BRANCH}" +echo "" + +#============================================================================== +# SECTION 2: GITHUB LABEL CHECKING +#============================================================================== + +# Function: Check GitHub PR labels +check_github_labels() { + local pr_number="$1" + + # Extract owner/repo from git URL + local repo_full_name=$(git remote get-url origin 2>/dev/null | sed -E 's/.*github\.com[:/]([^/]+\/[^/]+)(\.git)?$/\1/' || echo "") + + if [[ -z "$repo_full_name" ]]; then + echo "Warning: Could not determine GitHub repository" + return 1 + fi + + echo "--- Checking GitHub PR labels" + echo "Repository: ${repo_full_name}" + echo "PR Number: ${pr_number}" + + # Try to fetch labels via GitHub API (no auth needed for public repos) + if command -v curl >/dev/null 2>&1; then + local api_url="https://api.github.com/repos/${repo_full_name}/pulls/${pr_number}" + local response=$(curl -s -f "${api_url}" 2>/dev/null || echo "") + + if [[ -n "$response" ]]; then + # Extract label names + local labels=$(echo "$response" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 | tr '\n' ',' || echo "") + + # Check for specific labels + if [[ "$labels" == *"ready-run-all-tests"* ]]; then + RUN_ALL_TESTS=1 + echo "✓ Found label: ready-run-all-tests" + fi + + if [[ "$labels" == *"ci-no-fail-fast"* ]]; then + NO_FAIL_FAST=1 + echo "✓ Found label: ci-no-fail-fast" + fi + + [[ "$RUN_ALL_TESTS" == "0" ]] && echo " No run-all-tests label" + [[ "$NO_FAIL_FAST" == "0" ]] && echo " No fail-fast override label" + return 0 + fi + fi + + echo "Warning: Could not fetch GitHub labels (API unavailable or request failed)" + return 1 +} + +# Initialize flags +RUN_ALL_TESTS=0 +NO_FAIL_FAST=0 + +# Check labels if this is a PR +if [[ "${BUILDKITE_PULL_REQUEST:-false}" != "false" ]] && [[ "${BUILDKITE_PULL_REQUEST}" != "" ]]; then + check_github_labels "${BUILDKITE_PULL_REQUEST}" || echo "Continuing without label information" + echo "" +fi + +#============================================================================== +# SECTION 3: CHANGE DETECTION & ANALYSIS +#============================================================================== + +# Function: Detect if only docs changed +is_docs_only_change() { + local changed_files="$1" + + # Docs-related patterns + local docs_patterns=( + "^docs/" + "^README" + "\\.md$" + "\\.rst$" + "^LICENSE" + "^CONTRIBUTING" + ) + + local has_non_docs=0 + + while IFS= read -r file; do + [[ -z "$file" ]] && continue + + local is_docs=0 + for pattern in "${docs_patterns[@]}"; do + if [[ "$file" =~ $pattern ]]; then + is_docs=1 + break + fi + done + + if [[ "$is_docs" == "0" ]]; then + # Found non-docs file + has_non_docs=1 + break + fi + done <<< "$changed_files" + + # Return 0 (success) if all files are docs, 1 otherwise + [[ "$has_non_docs" == "0" ]] +} + +# Function: Detect critical file changes +has_critical_file_changes() { + local changed_files="$1" + + # Critical patterns that trigger full test run + local critical_patterns=( + "^docker/Dockerfile" + "^requirements.*\\.txt$" + "^setup\\.py$" + "^pyproject\\.toml$" + "^\\.buildkite/bootstrap-omni-amd\\.sh$" + "^\\.buildkite/test-amd\\.yaml$" + "^\\.buildkite/scripts/hardware_ci/run-amd-test\\.sh$" + ) + + while IFS= read -r file; do + [[ -z "$file" ]] && continue + + for pattern in "${critical_patterns[@]}"; do + if [[ "$file" =~ $pattern ]]; then + echo " Critical file changed: $file" + return 0 + fi + done + done <<< "$changed_files" + + return 1 +} + +echo "--- Analyzing changed files" + +# Get list of changed files +CHANGED_FILES="" +if git rev-parse "origin/${BASE_BRANCH}" >/dev/null 2>&1; then + # Fetch latest base branch + echo "Fetching origin/${BASE_BRANCH}..." + git fetch origin "${BASE_BRANCH}" >/dev/null 2>&1 || true + + # Get changed files between base and current commit + CHANGED_FILES=$(git diff --name-only "origin/${BASE_BRANCH}...${BUILDKITE_COMMIT}" 2>/dev/null || \ + git diff --name-only "origin/${BASE_BRANCH}" "${BUILDKITE_COMMIT}" 2>/dev/null || \ + echo "") +else + echo "Warning: Could not find base branch ${BASE_BRANCH}" + echo "Will run all tests as a safety measure" + RUN_ALL_TESTS=1 +fi + +# Count changed files +CHANGED_FILE_COUNT=$(echo "$CHANGED_FILES" | grep -c . || echo "0") +echo "Changed files: ${CHANGED_FILE_COUNT}" + +# Debug: Show changed files if in debug mode +if [[ "$DEBUG" == "1" ]] && [[ -n "$CHANGED_FILES" ]]; then + echo "Changed files list:" + echo "$CHANGED_FILES" | head -20 + [[ "$CHANGED_FILE_COUNT" -gt 20 ]] && echo "... (${CHANGED_FILE_COUNT} total files)" + echo "" +fi + +# Check for docs-only changes (early exit optimization) +if [[ "$CHANGED_FILE_COUNT" -gt 0 ]] && is_docs_only_change "$CHANGED_FILES"; then + echo "" + echo "=== Documentation-only changes detected ===" + echo "Skipping CI tests to save resources" + echo "" + + # Generate minimal pipeline + cat > .buildkite/pipeline.yaml << 'EOF' +steps: + - label: ":memo: Docs-only change" + command: echo "Only documentation changed, skipping tests" + agents: + queue: cpu_queue_premerge +EOF + + echo "--- Generated minimal pipeline:" + cat .buildkite/pipeline.yaml + echo "" + + echo "--- Uploading pipeline to Buildkite" + buildkite-agent pipeline upload .buildkite/pipeline.yaml + + echo "=== Bootstrap Complete (docs-only) ===" + exit 0 +fi + +# Check for critical file changes +if has_critical_file_changes "$CHANGED_FILES"; then + echo " → Critical files detected: Will run ALL tests" + RUN_ALL_TESTS=1 +fi + +echo "" + +#============================================================================== +# SECTION 4: YAML PARSING (Pure Bash) +#============================================================================== + +echo "--- Parsing test-amd.yaml" + +# Parse test-amd.yaml into structured variables +# Variables will be named: STEP__ +# Arrays/lists use || as delimiter + +TOTAL_STEPS=0 +declare -A STEP_DATA + +parse_test_yaml() { + local yaml_file="$1" + + local step_num=0 + local in_step=0 + local current_section="" + local list_indent=0 + + while IFS= read -r raw_line; do + # Handle line without removing all whitespace (need to detect indentation) + local line="$raw_line" + + # Skip comments and empty lines + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "${line// /}" ]] && continue + + # Detect indentation level + local indent=$(echo "$line" | sed -E 's/^([[:space:]]*).*/\1/' | wc -c) + indent=$((indent - 1)) # wc -c counts \n + + # Detect new step (starts with "- label:") + if [[ "$line" =~ ^[[:space:]]*-[[:space:]]+label:[[:space:]]*(.+)$ ]]; then + step_num=$((step_num + 1)) + in_step=1 + local label="${BASH_REMATCH[1]}" + label=$(echo "$label" | sed 's/^["'"'"']//' | sed 's/["'"'"']$//') + + eval "STEP_${step_num}_LABEL=\"\$label\"" + eval "STEP_${step_num}_KEY=\"step-${step_num}\"" + + echo " Step ${step_num}: ${label}" + current_section="" + continue + fi + + [[ "$in_step" == "0" ]] && continue + + # Parse key-value pairs at step level + if [[ "$line" =~ ^[[:space:]]+([a-z_]+):[[:space:]]*(.*)$ ]]; then + local key="${BASH_REMATCH[1]}" + local value="${BASH_REMATCH[2]}" + + # Clean value (remove quotes, brackets) + value=$(echo "$value" | sed 's/^["'"'"'\[]//' | sed 's/["'"'"'\]]*$//') + + case "$key" in + key) + eval "STEP_${step_num}_KEY=\"\$value\"" + ;; + mirror_hardwares) + # Parse array: [amdexperimental, amdproduction] + value=$(echo "$value" | tr ',' ' ' | xargs) + eval "STEP_${step_num}_MIRROR_HARDWARES=\"\$value\"" + ;; + agent_pool) + eval "STEP_${step_num}_AGENT_POOL=\"\$value\"" + ;; + timeout_in_minutes) + eval "STEP_${step_num}_TIMEOUT=\"\$value\"" + ;; + fast_check) + eval "STEP_${step_num}_FAST_CHECK=\"\$value\"" + ;; + working_dir) + eval "STEP_${step_num}_WORKING_DIR=\"\$value\"" + ;; + queue) + # Part of agents section + eval "STEP_${step_num}_QUEUE=\"\$value\"" + ;; + commands) + current_section="commands" + eval "STEP_${step_num}_COMMANDS=\"\"" + list_indent=$indent + ;; + source_file_dependencies) + current_section="dependencies" + eval "STEP_${step_num}_DEPENDENCIES=\"\"" + list_indent=$indent + ;; + agents) + current_section="agents" + ;; + *) + # Check if we left a list section + if [[ "$indent" -le "$list_indent" ]] && [[ -n "$current_section" ]]; then + current_section="" + fi + ;; + esac + # Parse list items (- item) + elif [[ "$line" =~ ^[[:space:]]*-[[:space:]]+(.+)$ ]]; then + local item="${BASH_REMATCH[1]}" + item=$(echo "$item" | sed 's/^["'"'"']//' | sed 's/["'"'"']$//') + + if [[ "$current_section" == "commands" ]]; then + local current_cmds + eval "current_cmds=\"\$STEP_${step_num}_COMMANDS\"" + if [[ -n "$current_cmds" ]]; then + eval "STEP_${step_num}_COMMANDS=\"\${current_cmds}||\$item\"" + else + eval "STEP_${step_num}_COMMANDS=\"\$item\"" + fi + elif [[ "$current_section" == "dependencies" ]]; then + local current_deps + eval "current_deps=\"\$STEP_${step_num}_DEPENDENCIES\"" + if [[ -n "$current_deps" ]]; then + eval "STEP_${step_num}_DEPENDENCIES=\"\${current_deps}||\$item\"" + else + eval "STEP_${step_num}_DEPENDENCIES=\"\$item\"" + fi + fi + fi + done < "$yaml_file" + + TOTAL_STEPS=$step_num +} + +parse_test_yaml ".buildkite/test-amd.yaml" + +echo "Parsed ${TOTAL_STEPS} steps from test-amd.yaml" + +# Validate parsing +if [[ "$TOTAL_STEPS" == "0" ]]; then + echo "Error: No steps found in test-amd.yaml" + exit 1 +fi + +echo "" + +#============================================================================== +# SECTION 5: TEST FILTERING & SELECTION +#============================================================================== + +# Function: Check if step should run based on file dependencies +should_run_step() { + local step_num="$1" + + # If RUN_ALL_TESTS is set, always run + if [[ "$RUN_ALL_TESTS" == "1" ]]; then + return 0 + fi + + # Get step dependencies + local deps_var="STEP_${step_num}_DEPENDENCIES" + local dependencies="${!deps_var:-}" + + # If no dependencies specified, always run (catch-all test) + if [[ -z "$dependencies" ]]; then + return 0 + fi + + # If no files changed, don't run + if [[ -z "$CHANGED_FILES" ]] || [[ "$CHANGED_FILE_COUNT" == "0" ]]; then + return 1 + fi + + # Check if any changed file matches dependencies + # Dependencies use prefix matching (e.g., "vllm_omni/" matches "vllm_omni/diffusion/model.py") + local IFS='||' + local deps_array=($dependencies) + + for dep_pattern in "${deps_array[@]}"; do + # Remove trailing slashes for consistency + dep_pattern="${dep_pattern%/}" + + while IFS= read -r changed_file; do + [[ -z "$changed_file" ]] && continue + + # Check if changed file matches dependency pattern + # Support both prefix matching and exact file matching + if [[ "$changed_file" == "${dep_pattern}"* ]] || [[ "$changed_file" == "$dep_pattern" ]]; then + if [[ "$DEBUG" == "1" ]]; then + echo " Match: ${changed_file} ↔ ${dep_pattern}" + fi + return 0 + fi + done <<< "$CHANGED_FILES" + done + + return 1 +} + +# Function: Check if step matches hardware filter +matches_hardware() { + local step_num="$1" + local target_hardware="${2:-amdexperimental}" + + # Get mirror_hardwares for this step + local hw_var="STEP_${step_num}_MIRROR_HARDWARES" + local hardwares="${!hw_var:-}" + + # If no hardware specified, it's a non-AMD step (CPU/build steps) + # These should always be included + if [[ -z "$hardwares" ]]; then + return 0 + fi + + # Check if target hardware is in the list + if [[ "$hardwares" == *"$target_hardware"* ]]; then + return 0 + fi + + return 1 +} + +echo "--- Filtering tests" +echo "Filter criteria:" +echo " Target hardware: amdexperimental" +echo " RUN_ALL_TESTS: ${RUN_ALL_TESTS}" +echo " Changed files: ${CHANGED_FILE_COUNT}" +echo "" + +# Build list of steps to include +INCLUDED_STEPS=() + +for ((i=1; i<=TOTAL_STEPS; i++)); do + local label_var="STEP_${i}_LABEL" + local label="${!label_var:-Step $i}" + + # Check hardware match + if ! matches_hardware "$i" "amdexperimental"; then + echo " Step ${i} (${label}): ✗ Wrong hardware - SKIP" + continue + fi + + # Check file dependencies + if should_run_step "$i"; then + echo " Step ${i} (${label}): ✓ INCLUDE" + INCLUDED_STEPS+=("$i") + else + echo " Step ${i} (${label}): ✗ No matching changes - SKIP" + fi +done + +echo "" +echo "Selected ${#INCLUDED_STEPS[@]} of ${TOTAL_STEPS} steps" + +# Ensure at least build step (step 1) is included +if [[ "${#INCLUDED_STEPS[@]}" == "0" ]]; then + echo "Warning: No tests selected, including build step as fallback" + INCLUDED_STEPS=(1) +fi + +echo "" + +#============================================================================== +# SECTION 6: PIPELINE GENERATION +#============================================================================== + +echo "--- Generating Buildkite pipeline" + +# Start pipeline file +cat > .buildkite/pipeline.yaml << 'PIPELINE_HEADER' +# Auto-generated by bootstrap-omni-amd.sh +# DO NOT EDIT MANUALLY - Edit .buildkite/test-amd.yaml instead +# +# Generated: +PIPELINE_HEADER + +echo "# $(date -u)" >> .buildkite/pipeline.yaml +echo "" >> .buildkite/pipeline.yaml +echo "steps:" >> .buildkite/pipeline.yaml + +# Generate steps +for step_num in "${INCLUDED_STEPS[@]}"; do + # Get step data + local label_var="STEP_${step_num}_LABEL" + local label="${!label_var}" + + local key_var="STEP_${step_num}_KEY" + local key="${!key_var:-step-${step_num}}" + + local queue_var="STEP_${step_num}_QUEUE" + local queue="${!queue_var:-}" + + local agent_pool_var="STEP_${step_num}_AGENT_POOL" + local agent_pool="${!agent_pool_var:-}" + + local timeout_var="STEP_${step_num}_TIMEOUT" + local timeout="${!timeout_var:-10}" + + local commands_var="STEP_${step_num}_COMMANDS" + local commands="${!commands_var:-}" + + local working_dir_var="STEP_${step_num}_WORKING_DIR" + local working_dir="${!working_dir_var:-}" + + # Determine queue + local final_queue="" + if [[ -n "$queue" ]]; then + final_queue="$queue" + elif [[ -n "$agent_pool" ]]; then + # Map agent_pool to queue name + case "$agent_pool" in + mi325_1) final_queue="amd_mi325_1" ;; + mi325_2) final_queue="amd_mi325_2" ;; + mi325_4) final_queue="amd_mi325_4" ;; + mi325_8) final_queue="amd_mi325_8" ;; + *) final_queue="amd_${agent_pool}" ;; + esac + else + # Default queue for steps without specification + final_queue="cpu_queue_premerge" + fi + + # Generate step YAML + cat >> .buildkite/pipeline.yaml << STEP_START + + - label: "${label}" + key: "${key}" + agents: + queue: "${final_queue}" + cluster: "CI" +STEP_START + + # Add commands + if [[ -n "$commands" ]]; then + # Check if this is an AMD GPU test (needs run-amd-test.sh wrapper) + if [[ "$final_queue" == amd_* ]]; then + # Build command string for AMD GPU execution + local cmd_string="" + local IFS='||' + local cmd_array=($commands) + + # Add ROCm check prefix + cmd_string="(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1" + + # Add working directory if specified + if [[ -n "$working_dir" ]]; then + cmd_string="${cmd_string} && cd ${working_dir}" + fi + + # Join commands with && + for cmd in "${cmd_array[@]}"; do + cmd_string="${cmd_string} && ${cmd}" + done + + # Wrap in run-amd-test.sh + echo " command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh \"${cmd_string}\"" >> .buildkite/pipeline.yaml + else + # CPU or build step - direct commands + echo " commands:" >> .buildkite/pipeline.yaml + local IFS='||' + local cmd_array=($commands) + for cmd in "${cmd_array[@]}"; do + echo " - \"${cmd}\"" >> .buildkite/pipeline.yaml + done + fi + else + # No commands specified + echo " command: echo \"No commands specified for this step\"" >> .buildkite/pipeline.yaml + fi + + # Add timeout + echo " timeout_in_minutes: ${timeout}" >> .buildkite/pipeline.yaml + + # Add retry for AMD GPU tests + if [[ "$final_queue" == amd_* ]]; then + cat >> .buildkite/pipeline.yaml << 'RETRY_BLOCK' + retry: + automatic: + - exit_status: "*" + limit: 1 +RETRY_BLOCK + fi + + # Add environment variables for AMD GPU tests + if [[ "$final_queue" == amd_* ]]; then + cat >> .buildkite/pipeline.yaml << 'ENV_BLOCK' + env: + HF_HOME: "/root/.cache/huggingface" +ENV_BLOCK + fi + + # Add depends_on for non-first steps (depend on build step) + if [[ "$step_num" != "${INCLUDED_STEPS[0]}" ]]; then + local first_key_var="STEP_${INCLUDED_STEPS[0]}_KEY" + local first_key="${!first_key_var:-step-${INCLUDED_STEPS[0]}}" + echo " depends_on: \"${first_key}\"" >> .buildkite/pipeline.yaml + fi +done + +# Validate generated pipeline +if ! grep -q "^steps:" .buildkite/pipeline.yaml; then + echo "Error: Generated pipeline is invalid (missing 'steps:' section)" + exit 1 +fi + +echo "Pipeline generated successfully" +echo "" + +# Display generated pipeline +echo "--- Generated Pipeline:" +cat .buildkite/pipeline.yaml +echo "" + +# Upload pipeline to Buildkite +echo "--- Uploading pipeline to Buildkite" +buildkite-agent pipeline upload .buildkite/pipeline.yaml + +echo "" +echo "=== Bootstrap Complete ===" +echo "Configuration:" +echo " Total steps defined: ${TOTAL_STEPS}" +echo " Steps selected: ${#INCLUDED_STEPS[@]}" +echo " RUN_ALL_TESTS: ${RUN_ALL_TESTS}" +echo " NO_FAIL_FAST: ${NO_FAIL_FAST}" +echo " Changed files: ${CHANGED_FILE_COUNT}" +echo "" +echo "Pipeline uploaded successfully!" diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh new file mode 100755 index 00000000000..faf51234619 --- /dev/null +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +# This script runs tests inside the ROCm docker container for vLLM-Omni. +# Adapted from vLLM's run-amd-test.sh for vllm-omni's simpler use case. + +set -o pipefail + +# Export Python path +export PYTHONPATH=".." + +# Print ROCm version +echo "--- Confirming Clean Initial State" +while true; do + sleep 3 + if grep -q clean /opt/amdgpu/etc/gpu_state; then + echo "GPUs state is \"clean\"" + break + fi +done + +echo "--- ROCm info" +rocminfo + +# Cleanup older docker images +cleanup_docker() { + docker_root=$(docker info -f '{{.DockerRootDir}}') + if [ -z "$docker_root" ]; then + echo "Failed to determine Docker root directory." + exit 1 + fi + echo "Docker root directory: $docker_root" + + disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//') + threshold=70 + + if [ "$disk_usage" -gt "$threshold" ]; then + echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..." + docker image prune -f + docker volume prune -f && docker system prune --force --filter "until=72h" --all + echo "Docker images and volumes cleanup completed." + else + echo "Disk usage is below $threshold%. No cleanup needed." + fi +} + +cleanup_docker + +echo "--- Resetting GPUs" +echo "reset" > /opt/amdgpu/etc/gpu_state + +while true; do + sleep 3 + if grep -q clean /opt/amdgpu/etc/gpu_state; then + echo "GPUs state is \"clean\"" + break + fi +done + +echo "--- Pulling/Building container" +image_name="vllm/vllm-omni-rocm-ci:${BUILDKITE_COMMIT}" +container_name="rocm_vllm_omni_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" + +# Try to pull image first, if it doesn't exist, build it +if ! docker pull "${image_name}" 2>/dev/null; then + echo "Image not found, building from Dockerfile.rocm..." + cd "$(dirname "$0")/../../.." # Go to repo root + docker build \ + -f docker/Dockerfile.rocm \ + -t "${image_name}" \ + --build-arg BUILDKITE_COMMIT="${BUILDKITE_COMMIT}" \ + . +fi + +remove_docker_container() { + docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true +} +trap remove_docker_container EXIT + +echo "--- Running container" + +# HuggingFace cache setup +HF_CACHE="$(realpath ~)/huggingface" +mkdir -p "${HF_CACHE}" +HF_MOUNT="/root/.cache/huggingface" + +# Get commands from arguments +commands=$@ +echo "Commands: $commands" + +# Get render group for GPU access +render_gid=$(getent group render | cut -d: -f3) +if [[ -z "$render_gid" ]]; then + echo "Error: 'render' group not found. This is required for GPU access." >&2 + exit 1 +fi + +# Run tests in container +echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" +docker run \ + --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ + --network=host \ + --shm-size=16gb \ + --group-add "$render_gid" \ + --rm \ + -e HF_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -v "${HF_CACHE}:${HF_MOUNT}" \ + -e "HF_HOME=${HF_MOUNT}" \ + -e "PYTHONPATH=.." \ + --name "${container_name}" \ + "${image_name}" \ + /bin/bash -c "${commands}" diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml new file mode 100644 index 00000000000..bbe196d8a35 --- /dev/null +++ b/.buildkite/test-amd.yaml @@ -0,0 +1,34 @@ +# AMD Test Pipeline for vLLM-Omni +# +# This file follows vLLM's test-amd.yaml structure and will be processed +# by Jinja templates in the buildkite-ci repository (if configured). +# +# Documentation: +# - mirror_hardwares: list of AMD hardware to run tests on [amdexperimental, amdproduction, amdtentative] +# - agent_pool: GPU pool to use (mi325_1 = single MI325X GPU) +# - fast_check: run on every commit +# - timeout_in_minutes: test timeout +# - source_file_dependencies: trigger test only when these files change +# - commands: list of commands to execute +# - working_dir: directory where commands execute (default: /vllm-omni-workspace/tests) + +steps: +##### fast check tests ##### + +- label: "Simple Unit Test" + commands: + - ".buildkite/scripts/simple_test.sh" + agents: + queue: "cpu_queue_premerge" + +# - label: ":rocm: Z-Image Diffusion Model Test" +# mirror_hardwares: [amdexperimental] +# agent_pool: mi325_1 +# timeout_in_minutes: 20 +# fast_check: true +# source_file_dependencies: +# - vllm_omni/ +# - tests/test_diffusion_model.py +# commands: +# - pytest -v -s test_diffusion_model.py +# working_dir: "/vllm-omni-workspace/tests" From d5c75c348dce849362572a68796e17644f0117a9 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 11 Dec 2025 15:01:08 +0000 Subject: [PATCH 04/32] fix local error Signed-off-by: tjtanaa --- .buildkite/bootstrap-omni-amd.sh | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/.buildkite/bootstrap-omni-amd.sh b/.buildkite/bootstrap-omni-amd.sh index 724050cd477..a6211f0b595 100755 --- a/.buildkite/bootstrap-omni-amd.sh +++ b/.buildkite/bootstrap-omni-amd.sh @@ -481,8 +481,8 @@ echo "" INCLUDED_STEPS=() for ((i=1; i<=TOTAL_STEPS; i++)); do - local label_var="STEP_${i}_LABEL" - local label="${!label_var:-Step $i}" + label_var="STEP_${i}_LABEL" + label="${!label_var:-Step $i}" # Check hardware match if ! matches_hardware "$i" "amdexperimental"; then @@ -531,29 +531,29 @@ echo "steps:" >> .buildkite/pipeline.yaml # Generate steps for step_num in "${INCLUDED_STEPS[@]}"; do # Get step data - local label_var="STEP_${step_num}_LABEL" - local label="${!label_var}" + label_var="STEP_${step_num}_LABEL" + label="${!label_var}" - local key_var="STEP_${step_num}_KEY" - local key="${!key_var:-step-${step_num}}" + key_var="STEP_${step_num}_KEY" + key="${!key_var:-step-${step_num}}" - local queue_var="STEP_${step_num}_QUEUE" - local queue="${!queue_var:-}" + queue_var="STEP_${step_num}_QUEUE" + queue="${!queue_var:-}" - local agent_pool_var="STEP_${step_num}_AGENT_POOL" - local agent_pool="${!agent_pool_var:-}" + agent_pool_var="STEP_${step_num}_AGENT_POOL" + agent_pool="${!agent_pool_var:-}" - local timeout_var="STEP_${step_num}_TIMEOUT" - local timeout="${!timeout_var:-10}" + timeout_var="STEP_${step_num}_TIMEOUT" + timeout="${!timeout_var:-10}" - local commands_var="STEP_${step_num}_COMMANDS" - local commands="${!commands_var:-}" + commands_var="STEP_${step_num}_COMMANDS" + commands="${!commands_var:-}" - local working_dir_var="STEP_${step_num}_WORKING_DIR" - local working_dir="${!working_dir_var:-}" + working_dir_var="STEP_${step_num}_WORKING_DIR" + working_dir="${!working_dir_var:-}" # Determine queue - local final_queue="" + final_queue="" if [[ -n "$queue" ]]; then final_queue="$queue" elif [[ -n "$agent_pool" ]]; then @@ -585,9 +585,9 @@ STEP_START # Check if this is an AMD GPU test (needs run-amd-test.sh wrapper) if [[ "$final_queue" == amd_* ]]; then # Build command string for AMD GPU execution - local cmd_string="" - local IFS='||' - local cmd_array=($commands) + cmd_string="" + IFS='||' + cmd_array=($commands) # Add ROCm check prefix cmd_string="(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1" @@ -607,8 +607,8 @@ STEP_START else # CPU or build step - direct commands echo " commands:" >> .buildkite/pipeline.yaml - local IFS='||' - local cmd_array=($commands) + IFS='||' + cmd_array=($commands) for cmd in "${cmd_array[@]}"; do echo " - \"${cmd}\"" >> .buildkite/pipeline.yaml done @@ -641,8 +641,8 @@ ENV_BLOCK # Add depends_on for non-first steps (depend on build step) if [[ "$step_num" != "${INCLUDED_STEPS[0]}" ]]; then - local first_key_var="STEP_${INCLUDED_STEPS[0]}_KEY" - local first_key="${!first_key_var:-step-${INCLUDED_STEPS[0]}}" + first_key_var="STEP_${INCLUDED_STEPS[0]}_KEY" + first_key="${!first_key_var:-step-${INCLUDED_STEPS[0]}}" echo " depends_on: \"${first_key}\"" >> .buildkite/pipeline.yaml fi done From e2c24b77495661d836fc40ae3d99a34e7a3bbfec Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 11 Dec 2025 15:16:33 +0000 Subject: [PATCH 05/32] simplify amd test to just test build docker Signed-off-by: tjtanaa --- .buildkite/bootstrap-omni-amd.sh | 677 ------------------ .../scripts/hardware_ci/run-amd-test.sh | 113 --- .buildkite/test-amd.yaml | 43 +- 3 files changed, 11 insertions(+), 822 deletions(-) delete mode 100755 .buildkite/bootstrap-omni-amd.sh delete mode 100755 .buildkite/scripts/hardware_ci/run-amd-test.sh diff --git a/.buildkite/bootstrap-omni-amd.sh b/.buildkite/bootstrap-omni-amd.sh deleted file mode 100755 index a6211f0b595..00000000000 --- a/.buildkite/bootstrap-omni-amd.sh +++ /dev/null @@ -1,677 +0,0 @@ -#!/bin/bash -# vLLM-Omni AMD CI Bootstrap -# Intelligent CI orchestration following vLLM's ci-infra approach -# -# Features: -# - Smart change detection (docs-only skip, critical files) -# - Pure bash YAML parsing -# - Test filtering by source_file_dependencies and mirror_hardwares -# - GitHub PR label support (ready-run-all-tests, ci-no-fail-fast) -# - Dynamic Buildkite pipeline generation - -set -euo pipefail - -#============================================================================== -# SECTION 1: INITIALIZATION & ENVIRONMENT DETECTION -#============================================================================== - -# Enable debugging if requested -DEBUG="${VLLM_CI_DEBUG:-0}" -[[ "$DEBUG" == "1" ]] && set -x - -echo "=== vLLM-Omni AMD CI Bootstrap ===" -echo "Timestamp: $(date -u +"%Y-%m-%d %H:%M:%S UTC")" -echo "Branch: ${BUILDKITE_BRANCH:-unknown}" -echo "Commit: ${BUILDKITE_COMMIT:-unknown}" -echo "Pull Request: ${BUILDKITE_PULL_REQUEST:-none}" -echo "" - -# Validate environment -if [ ! -d ".buildkite" ]; then - echo "Error: .buildkite directory not found" - echo "Please run this script from the repository root" - exit 1 -fi - -if [ ! -f ".buildkite/test-amd.yaml" ]; then - echo "Error: .buildkite/test-amd.yaml not found" - exit 1 -fi - -# Validate git repository -if ! git rev-parse --git-dir > /dev/null 2>&1; then - echo "Error: Not a git repository" - exit 1 -fi - -# Determine base branch for comparison -if [[ "${BUILDKITE_PULL_REQUEST:-false}" != "false" ]]; then - BASE_BRANCH="${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-main}" -else - BASE_BRANCH="main" -fi - -echo "Base branch for comparison: ${BASE_BRANCH}" -echo "" - -#============================================================================== -# SECTION 2: GITHUB LABEL CHECKING -#============================================================================== - -# Function: Check GitHub PR labels -check_github_labels() { - local pr_number="$1" - - # Extract owner/repo from git URL - local repo_full_name=$(git remote get-url origin 2>/dev/null | sed -E 's/.*github\.com[:/]([^/]+\/[^/]+)(\.git)?$/\1/' || echo "") - - if [[ -z "$repo_full_name" ]]; then - echo "Warning: Could not determine GitHub repository" - return 1 - fi - - echo "--- Checking GitHub PR labels" - echo "Repository: ${repo_full_name}" - echo "PR Number: ${pr_number}" - - # Try to fetch labels via GitHub API (no auth needed for public repos) - if command -v curl >/dev/null 2>&1; then - local api_url="https://api.github.com/repos/${repo_full_name}/pulls/${pr_number}" - local response=$(curl -s -f "${api_url}" 2>/dev/null || echo "") - - if [[ -n "$response" ]]; then - # Extract label names - local labels=$(echo "$response" | grep -o '"name":"[^"]*"' | cut -d'"' -f4 | tr '\n' ',' || echo "") - - # Check for specific labels - if [[ "$labels" == *"ready-run-all-tests"* ]]; then - RUN_ALL_TESTS=1 - echo "✓ Found label: ready-run-all-tests" - fi - - if [[ "$labels" == *"ci-no-fail-fast"* ]]; then - NO_FAIL_FAST=1 - echo "✓ Found label: ci-no-fail-fast" - fi - - [[ "$RUN_ALL_TESTS" == "0" ]] && echo " No run-all-tests label" - [[ "$NO_FAIL_FAST" == "0" ]] && echo " No fail-fast override label" - return 0 - fi - fi - - echo "Warning: Could not fetch GitHub labels (API unavailable or request failed)" - return 1 -} - -# Initialize flags -RUN_ALL_TESTS=0 -NO_FAIL_FAST=0 - -# Check labels if this is a PR -if [[ "${BUILDKITE_PULL_REQUEST:-false}" != "false" ]] && [[ "${BUILDKITE_PULL_REQUEST}" != "" ]]; then - check_github_labels "${BUILDKITE_PULL_REQUEST}" || echo "Continuing without label information" - echo "" -fi - -#============================================================================== -# SECTION 3: CHANGE DETECTION & ANALYSIS -#============================================================================== - -# Function: Detect if only docs changed -is_docs_only_change() { - local changed_files="$1" - - # Docs-related patterns - local docs_patterns=( - "^docs/" - "^README" - "\\.md$" - "\\.rst$" - "^LICENSE" - "^CONTRIBUTING" - ) - - local has_non_docs=0 - - while IFS= read -r file; do - [[ -z "$file" ]] && continue - - local is_docs=0 - for pattern in "${docs_patterns[@]}"; do - if [[ "$file" =~ $pattern ]]; then - is_docs=1 - break - fi - done - - if [[ "$is_docs" == "0" ]]; then - # Found non-docs file - has_non_docs=1 - break - fi - done <<< "$changed_files" - - # Return 0 (success) if all files are docs, 1 otherwise - [[ "$has_non_docs" == "0" ]] -} - -# Function: Detect critical file changes -has_critical_file_changes() { - local changed_files="$1" - - # Critical patterns that trigger full test run - local critical_patterns=( - "^docker/Dockerfile" - "^requirements.*\\.txt$" - "^setup\\.py$" - "^pyproject\\.toml$" - "^\\.buildkite/bootstrap-omni-amd\\.sh$" - "^\\.buildkite/test-amd\\.yaml$" - "^\\.buildkite/scripts/hardware_ci/run-amd-test\\.sh$" - ) - - while IFS= read -r file; do - [[ -z "$file" ]] && continue - - for pattern in "${critical_patterns[@]}"; do - if [[ "$file" =~ $pattern ]]; then - echo " Critical file changed: $file" - return 0 - fi - done - done <<< "$changed_files" - - return 1 -} - -echo "--- Analyzing changed files" - -# Get list of changed files -CHANGED_FILES="" -if git rev-parse "origin/${BASE_BRANCH}" >/dev/null 2>&1; then - # Fetch latest base branch - echo "Fetching origin/${BASE_BRANCH}..." - git fetch origin "${BASE_BRANCH}" >/dev/null 2>&1 || true - - # Get changed files between base and current commit - CHANGED_FILES=$(git diff --name-only "origin/${BASE_BRANCH}...${BUILDKITE_COMMIT}" 2>/dev/null || \ - git diff --name-only "origin/${BASE_BRANCH}" "${BUILDKITE_COMMIT}" 2>/dev/null || \ - echo "") -else - echo "Warning: Could not find base branch ${BASE_BRANCH}" - echo "Will run all tests as a safety measure" - RUN_ALL_TESTS=1 -fi - -# Count changed files -CHANGED_FILE_COUNT=$(echo "$CHANGED_FILES" | grep -c . || echo "0") -echo "Changed files: ${CHANGED_FILE_COUNT}" - -# Debug: Show changed files if in debug mode -if [[ "$DEBUG" == "1" ]] && [[ -n "$CHANGED_FILES" ]]; then - echo "Changed files list:" - echo "$CHANGED_FILES" | head -20 - [[ "$CHANGED_FILE_COUNT" -gt 20 ]] && echo "... (${CHANGED_FILE_COUNT} total files)" - echo "" -fi - -# Check for docs-only changes (early exit optimization) -if [[ "$CHANGED_FILE_COUNT" -gt 0 ]] && is_docs_only_change "$CHANGED_FILES"; then - echo "" - echo "=== Documentation-only changes detected ===" - echo "Skipping CI tests to save resources" - echo "" - - # Generate minimal pipeline - cat > .buildkite/pipeline.yaml << 'EOF' -steps: - - label: ":memo: Docs-only change" - command: echo "Only documentation changed, skipping tests" - agents: - queue: cpu_queue_premerge -EOF - - echo "--- Generated minimal pipeline:" - cat .buildkite/pipeline.yaml - echo "" - - echo "--- Uploading pipeline to Buildkite" - buildkite-agent pipeline upload .buildkite/pipeline.yaml - - echo "=== Bootstrap Complete (docs-only) ===" - exit 0 -fi - -# Check for critical file changes -if has_critical_file_changes "$CHANGED_FILES"; then - echo " → Critical files detected: Will run ALL tests" - RUN_ALL_TESTS=1 -fi - -echo "" - -#============================================================================== -# SECTION 4: YAML PARSING (Pure Bash) -#============================================================================== - -echo "--- Parsing test-amd.yaml" - -# Parse test-amd.yaml into structured variables -# Variables will be named: STEP__ -# Arrays/lists use || as delimiter - -TOTAL_STEPS=0 -declare -A STEP_DATA - -parse_test_yaml() { - local yaml_file="$1" - - local step_num=0 - local in_step=0 - local current_section="" - local list_indent=0 - - while IFS= read -r raw_line; do - # Handle line without removing all whitespace (need to detect indentation) - local line="$raw_line" - - # Skip comments and empty lines - [[ "$line" =~ ^[[:space:]]*# ]] && continue - [[ -z "${line// /}" ]] && continue - - # Detect indentation level - local indent=$(echo "$line" | sed -E 's/^([[:space:]]*).*/\1/' | wc -c) - indent=$((indent - 1)) # wc -c counts \n - - # Detect new step (starts with "- label:") - if [[ "$line" =~ ^[[:space:]]*-[[:space:]]+label:[[:space:]]*(.+)$ ]]; then - step_num=$((step_num + 1)) - in_step=1 - local label="${BASH_REMATCH[1]}" - label=$(echo "$label" | sed 's/^["'"'"']//' | sed 's/["'"'"']$//') - - eval "STEP_${step_num}_LABEL=\"\$label\"" - eval "STEP_${step_num}_KEY=\"step-${step_num}\"" - - echo " Step ${step_num}: ${label}" - current_section="" - continue - fi - - [[ "$in_step" == "0" ]] && continue - - # Parse key-value pairs at step level - if [[ "$line" =~ ^[[:space:]]+([a-z_]+):[[:space:]]*(.*)$ ]]; then - local key="${BASH_REMATCH[1]}" - local value="${BASH_REMATCH[2]}" - - # Clean value (remove quotes, brackets) - value=$(echo "$value" | sed 's/^["'"'"'\[]//' | sed 's/["'"'"'\]]*$//') - - case "$key" in - key) - eval "STEP_${step_num}_KEY=\"\$value\"" - ;; - mirror_hardwares) - # Parse array: [amdexperimental, amdproduction] - value=$(echo "$value" | tr ',' ' ' | xargs) - eval "STEP_${step_num}_MIRROR_HARDWARES=\"\$value\"" - ;; - agent_pool) - eval "STEP_${step_num}_AGENT_POOL=\"\$value\"" - ;; - timeout_in_minutes) - eval "STEP_${step_num}_TIMEOUT=\"\$value\"" - ;; - fast_check) - eval "STEP_${step_num}_FAST_CHECK=\"\$value\"" - ;; - working_dir) - eval "STEP_${step_num}_WORKING_DIR=\"\$value\"" - ;; - queue) - # Part of agents section - eval "STEP_${step_num}_QUEUE=\"\$value\"" - ;; - commands) - current_section="commands" - eval "STEP_${step_num}_COMMANDS=\"\"" - list_indent=$indent - ;; - source_file_dependencies) - current_section="dependencies" - eval "STEP_${step_num}_DEPENDENCIES=\"\"" - list_indent=$indent - ;; - agents) - current_section="agents" - ;; - *) - # Check if we left a list section - if [[ "$indent" -le "$list_indent" ]] && [[ -n "$current_section" ]]; then - current_section="" - fi - ;; - esac - # Parse list items (- item) - elif [[ "$line" =~ ^[[:space:]]*-[[:space:]]+(.+)$ ]]; then - local item="${BASH_REMATCH[1]}" - item=$(echo "$item" | sed 's/^["'"'"']//' | sed 's/["'"'"']$//') - - if [[ "$current_section" == "commands" ]]; then - local current_cmds - eval "current_cmds=\"\$STEP_${step_num}_COMMANDS\"" - if [[ -n "$current_cmds" ]]; then - eval "STEP_${step_num}_COMMANDS=\"\${current_cmds}||\$item\"" - else - eval "STEP_${step_num}_COMMANDS=\"\$item\"" - fi - elif [[ "$current_section" == "dependencies" ]]; then - local current_deps - eval "current_deps=\"\$STEP_${step_num}_DEPENDENCIES\"" - if [[ -n "$current_deps" ]]; then - eval "STEP_${step_num}_DEPENDENCIES=\"\${current_deps}||\$item\"" - else - eval "STEP_${step_num}_DEPENDENCIES=\"\$item\"" - fi - fi - fi - done < "$yaml_file" - - TOTAL_STEPS=$step_num -} - -parse_test_yaml ".buildkite/test-amd.yaml" - -echo "Parsed ${TOTAL_STEPS} steps from test-amd.yaml" - -# Validate parsing -if [[ "$TOTAL_STEPS" == "0" ]]; then - echo "Error: No steps found in test-amd.yaml" - exit 1 -fi - -echo "" - -#============================================================================== -# SECTION 5: TEST FILTERING & SELECTION -#============================================================================== - -# Function: Check if step should run based on file dependencies -should_run_step() { - local step_num="$1" - - # If RUN_ALL_TESTS is set, always run - if [[ "$RUN_ALL_TESTS" == "1" ]]; then - return 0 - fi - - # Get step dependencies - local deps_var="STEP_${step_num}_DEPENDENCIES" - local dependencies="${!deps_var:-}" - - # If no dependencies specified, always run (catch-all test) - if [[ -z "$dependencies" ]]; then - return 0 - fi - - # If no files changed, don't run - if [[ -z "$CHANGED_FILES" ]] || [[ "$CHANGED_FILE_COUNT" == "0" ]]; then - return 1 - fi - - # Check if any changed file matches dependencies - # Dependencies use prefix matching (e.g., "vllm_omni/" matches "vllm_omni/diffusion/model.py") - local IFS='||' - local deps_array=($dependencies) - - for dep_pattern in "${deps_array[@]}"; do - # Remove trailing slashes for consistency - dep_pattern="${dep_pattern%/}" - - while IFS= read -r changed_file; do - [[ -z "$changed_file" ]] && continue - - # Check if changed file matches dependency pattern - # Support both prefix matching and exact file matching - if [[ "$changed_file" == "${dep_pattern}"* ]] || [[ "$changed_file" == "$dep_pattern" ]]; then - if [[ "$DEBUG" == "1" ]]; then - echo " Match: ${changed_file} ↔ ${dep_pattern}" - fi - return 0 - fi - done <<< "$CHANGED_FILES" - done - - return 1 -} - -# Function: Check if step matches hardware filter -matches_hardware() { - local step_num="$1" - local target_hardware="${2:-amdexperimental}" - - # Get mirror_hardwares for this step - local hw_var="STEP_${step_num}_MIRROR_HARDWARES" - local hardwares="${!hw_var:-}" - - # If no hardware specified, it's a non-AMD step (CPU/build steps) - # These should always be included - if [[ -z "$hardwares" ]]; then - return 0 - fi - - # Check if target hardware is in the list - if [[ "$hardwares" == *"$target_hardware"* ]]; then - return 0 - fi - - return 1 -} - -echo "--- Filtering tests" -echo "Filter criteria:" -echo " Target hardware: amdexperimental" -echo " RUN_ALL_TESTS: ${RUN_ALL_TESTS}" -echo " Changed files: ${CHANGED_FILE_COUNT}" -echo "" - -# Build list of steps to include -INCLUDED_STEPS=() - -for ((i=1; i<=TOTAL_STEPS; i++)); do - label_var="STEP_${i}_LABEL" - label="${!label_var:-Step $i}" - - # Check hardware match - if ! matches_hardware "$i" "amdexperimental"; then - echo " Step ${i} (${label}): ✗ Wrong hardware - SKIP" - continue - fi - - # Check file dependencies - if should_run_step "$i"; then - echo " Step ${i} (${label}): ✓ INCLUDE" - INCLUDED_STEPS+=("$i") - else - echo " Step ${i} (${label}): ✗ No matching changes - SKIP" - fi -done - -echo "" -echo "Selected ${#INCLUDED_STEPS[@]} of ${TOTAL_STEPS} steps" - -# Ensure at least build step (step 1) is included -if [[ "${#INCLUDED_STEPS[@]}" == "0" ]]; then - echo "Warning: No tests selected, including build step as fallback" - INCLUDED_STEPS=(1) -fi - -echo "" - -#============================================================================== -# SECTION 6: PIPELINE GENERATION -#============================================================================== - -echo "--- Generating Buildkite pipeline" - -# Start pipeline file -cat > .buildkite/pipeline.yaml << 'PIPELINE_HEADER' -# Auto-generated by bootstrap-omni-amd.sh -# DO NOT EDIT MANUALLY - Edit .buildkite/test-amd.yaml instead -# -# Generated: -PIPELINE_HEADER - -echo "# $(date -u)" >> .buildkite/pipeline.yaml -echo "" >> .buildkite/pipeline.yaml -echo "steps:" >> .buildkite/pipeline.yaml - -# Generate steps -for step_num in "${INCLUDED_STEPS[@]}"; do - # Get step data - label_var="STEP_${step_num}_LABEL" - label="${!label_var}" - - key_var="STEP_${step_num}_KEY" - key="${!key_var:-step-${step_num}}" - - queue_var="STEP_${step_num}_QUEUE" - queue="${!queue_var:-}" - - agent_pool_var="STEP_${step_num}_AGENT_POOL" - agent_pool="${!agent_pool_var:-}" - - timeout_var="STEP_${step_num}_TIMEOUT" - timeout="${!timeout_var:-10}" - - commands_var="STEP_${step_num}_COMMANDS" - commands="${!commands_var:-}" - - working_dir_var="STEP_${step_num}_WORKING_DIR" - working_dir="${!working_dir_var:-}" - - # Determine queue - final_queue="" - if [[ -n "$queue" ]]; then - final_queue="$queue" - elif [[ -n "$agent_pool" ]]; then - # Map agent_pool to queue name - case "$agent_pool" in - mi325_1) final_queue="amd_mi325_1" ;; - mi325_2) final_queue="amd_mi325_2" ;; - mi325_4) final_queue="amd_mi325_4" ;; - mi325_8) final_queue="amd_mi325_8" ;; - *) final_queue="amd_${agent_pool}" ;; - esac - else - # Default queue for steps without specification - final_queue="cpu_queue_premerge" - fi - - # Generate step YAML - cat >> .buildkite/pipeline.yaml << STEP_START - - - label: "${label}" - key: "${key}" - agents: - queue: "${final_queue}" - cluster: "CI" -STEP_START - - # Add commands - if [[ -n "$commands" ]]; then - # Check if this is an AMD GPU test (needs run-amd-test.sh wrapper) - if [[ "$final_queue" == amd_* ]]; then - # Build command string for AMD GPU execution - cmd_string="" - IFS='||' - cmd_array=($commands) - - # Add ROCm check prefix - cmd_string="(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1" - - # Add working directory if specified - if [[ -n "$working_dir" ]]; then - cmd_string="${cmd_string} && cd ${working_dir}" - fi - - # Join commands with && - for cmd in "${cmd_array[@]}"; do - cmd_string="${cmd_string} && ${cmd}" - done - - # Wrap in run-amd-test.sh - echo " command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh \"${cmd_string}\"" >> .buildkite/pipeline.yaml - else - # CPU or build step - direct commands - echo " commands:" >> .buildkite/pipeline.yaml - IFS='||' - cmd_array=($commands) - for cmd in "${cmd_array[@]}"; do - echo " - \"${cmd}\"" >> .buildkite/pipeline.yaml - done - fi - else - # No commands specified - echo " command: echo \"No commands specified for this step\"" >> .buildkite/pipeline.yaml - fi - - # Add timeout - echo " timeout_in_minutes: ${timeout}" >> .buildkite/pipeline.yaml - - # Add retry for AMD GPU tests - if [[ "$final_queue" == amd_* ]]; then - cat >> .buildkite/pipeline.yaml << 'RETRY_BLOCK' - retry: - automatic: - - exit_status: "*" - limit: 1 -RETRY_BLOCK - fi - - # Add environment variables for AMD GPU tests - if [[ "$final_queue" == amd_* ]]; then - cat >> .buildkite/pipeline.yaml << 'ENV_BLOCK' - env: - HF_HOME: "/root/.cache/huggingface" -ENV_BLOCK - fi - - # Add depends_on for non-first steps (depend on build step) - if [[ "$step_num" != "${INCLUDED_STEPS[0]}" ]]; then - first_key_var="STEP_${INCLUDED_STEPS[0]}_KEY" - first_key="${!first_key_var:-step-${INCLUDED_STEPS[0]}}" - echo " depends_on: \"${first_key}\"" >> .buildkite/pipeline.yaml - fi -done - -# Validate generated pipeline -if ! grep -q "^steps:" .buildkite/pipeline.yaml; then - echo "Error: Generated pipeline is invalid (missing 'steps:' section)" - exit 1 -fi - -echo "Pipeline generated successfully" -echo "" - -# Display generated pipeline -echo "--- Generated Pipeline:" -cat .buildkite/pipeline.yaml -echo "" - -# Upload pipeline to Buildkite -echo "--- Uploading pipeline to Buildkite" -buildkite-agent pipeline upload .buildkite/pipeline.yaml - -echo "" -echo "=== Bootstrap Complete ===" -echo "Configuration:" -echo " Total steps defined: ${TOTAL_STEPS}" -echo " Steps selected: ${#INCLUDED_STEPS[@]}" -echo " RUN_ALL_TESTS: ${RUN_ALL_TESTS}" -echo " NO_FAIL_FAST: ${NO_FAIL_FAST}" -echo " Changed files: ${CHANGED_FILE_COUNT}" -echo "" -echo "Pipeline uploaded successfully!" diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh deleted file mode 100755 index faf51234619..00000000000 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash - -# This script runs tests inside the ROCm docker container for vLLM-Omni. -# Adapted from vLLM's run-amd-test.sh for vllm-omni's simpler use case. - -set -o pipefail - -# Export Python path -export PYTHONPATH=".." - -# Print ROCm version -echo "--- Confirming Clean Initial State" -while true; do - sleep 3 - if grep -q clean /opt/amdgpu/etc/gpu_state; then - echo "GPUs state is \"clean\"" - break - fi -done - -echo "--- ROCm info" -rocminfo - -# Cleanup older docker images -cleanup_docker() { - docker_root=$(docker info -f '{{.DockerRootDir}}') - if [ -z "$docker_root" ]; then - echo "Failed to determine Docker root directory." - exit 1 - fi - echo "Docker root directory: $docker_root" - - disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//') - threshold=70 - - if [ "$disk_usage" -gt "$threshold" ]; then - echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..." - docker image prune -f - docker volume prune -f && docker system prune --force --filter "until=72h" --all - echo "Docker images and volumes cleanup completed." - else - echo "Disk usage is below $threshold%. No cleanup needed." - fi -} - -cleanup_docker - -echo "--- Resetting GPUs" -echo "reset" > /opt/amdgpu/etc/gpu_state - -while true; do - sleep 3 - if grep -q clean /opt/amdgpu/etc/gpu_state; then - echo "GPUs state is \"clean\"" - break - fi -done - -echo "--- Pulling/Building container" -image_name="vllm/vllm-omni-rocm-ci:${BUILDKITE_COMMIT}" -container_name="rocm_vllm_omni_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" - -# Try to pull image first, if it doesn't exist, build it -if ! docker pull "${image_name}" 2>/dev/null; then - echo "Image not found, building from Dockerfile.rocm..." - cd "$(dirname "$0")/../../.." # Go to repo root - docker build \ - -f docker/Dockerfile.rocm \ - -t "${image_name}" \ - --build-arg BUILDKITE_COMMIT="${BUILDKITE_COMMIT}" \ - . -fi - -remove_docker_container() { - docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true -} -trap remove_docker_container EXIT - -echo "--- Running container" - -# HuggingFace cache setup -HF_CACHE="$(realpath ~)/huggingface" -mkdir -p "${HF_CACHE}" -HF_MOUNT="/root/.cache/huggingface" - -# Get commands from arguments -commands=$@ -echo "Commands: $commands" - -# Get render group for GPU access -render_gid=$(getent group render | cut -d: -f3) -if [[ -z "$render_gid" ]]; then - echo "Error: 'render' group not found. This is required for GPU access." >&2 - exit 1 -fi - -# Run tests in container -echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" -docker run \ - --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ - --network=host \ - --shm-size=16gb \ - --group-add "$render_gid" \ - --rm \ - -e HF_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -v "${HF_CACHE}:${HF_MOUNT}" \ - -e "HF_HOME=${HF_MOUNT}" \ - -e "PYTHONPATH=.." \ - --name "${container_name}" \ - "${image_name}" \ - /bin/bash -c "${commands}" diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index bbe196d8a35..625d59748a8 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1,34 +1,13 @@ -# AMD Test Pipeline for vLLM-Omni -# -# This file follows vLLM's test-amd.yaml structure and will be processed -# by Jinja templates in the buildkite-ci repository (if configured). -# -# Documentation: -# - mirror_hardwares: list of AMD hardware to run tests on [amdexperimental, amdproduction, amdtentative] -# - agent_pool: GPU pool to use (mi325_1 = single MI325X GPU) -# - fast_check: run on every commit -# - timeout_in_minutes: test timeout -# - source_file_dependencies: trigger test only when these files change -# - commands: list of commands to execute -# - working_dir: directory where commands execute (default: /vllm-omni-workspace/tests) - steps: -##### fast check tests ##### - -- label: "Simple Unit Test" - commands: - - ".buildkite/scripts/simple_test.sh" - agents: - queue: "cpu_queue_premerge" + - label: ":docker: Build image" + key: image-build + commands: + - "docker build -f docker/Dockerfile.rocm -t vllm-omni-rocm ." + agents: + queue: "cpu_queue_premerge_us_east_1" -# - label: ":rocm: Z-Image Diffusion Model Test" -# mirror_hardwares: [amdexperimental] -# agent_pool: mi325_1 -# timeout_in_minutes: 20 -# fast_check: true -# source_file_dependencies: -# - vllm_omni/ -# - tests/test_diffusion_model.py -# commands: -# - pytest -v -s test_diffusion_model.py -# working_dir: "/vllm-omni-workspace/tests" + - label: "Simple Unit Test" + commands: + - ".buildkite/scripts/simple_test.sh" + agents: + queue: "cpu_queue_premerge" From 46da88092adb73305b6f0c4f34da4528f19bf32c Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 11 Dec 2025 15:33:02 +0000 Subject: [PATCH 06/32] use amd-cpu to build image like in vLLM Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 625d59748a8..cfa4c4cd012 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -4,7 +4,7 @@ steps: commands: - "docker build -f docker/Dockerfile.rocm -t vllm-omni-rocm ." agents: - queue: "cpu_queue_premerge_us_east_1" + queue: "amd-cpu" - label: "Simple Unit Test" commands: From bb03847a19bcca0b2a3059b2cc266dc3a864e68d Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Fri, 12 Dec 2025 11:54:47 +0000 Subject: [PATCH 07/32] apply review feedback Co-authored-by: Hongxia Yang Signed-off-by: tjtanaa --- docker/Dockerfile.rocm | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 6e774135e26..0c8fb1ab419 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -1,12 +1,11 @@ -ARG ROCM_BASE_IMAGE=rocm/vllm-dev -ARG ROCM_BASE_TAG=nightly_main_20251005 -FROM ${ROCM_BASE_IMAGE}:${ROCM_BASE_TAG} +ARG BASE_IMAGE=rocm/vllm-dev:nightly_main_20251005 +FROM ${BASE_IMAGE} -ARG APP_DIR=/workspace/vllm-omni +ARG COMMON_WORKDIR=/app ARG VLLM_VERSION=v0.11.0 ARG PYTORCH_ROCM_ARCH="gfx942;gfx950" -WORKDIR ${APP_DIR} +WORKDIR ${COMMON_WORKDIR} # Step 1: Setup - Install system dependencies RUN apt-get update && \ @@ -15,27 +14,23 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* # Step 2: Reinstall vllm from source -RUN cd ../ && python3 -m pip uninstall -y vllm && \ +RUN python3 -m pip uninstall -y vllm && rm -rf vllm &&\ git clone https://github.com/vllm-project/vllm.git && \ cd vllm && \ git checkout ${VLLM_VERSION} && \ - python3 -c "import setuptools_scm; print(setuptools_scm.get_version())" && \ + python3 -m pip install -r requirements/rocm.txt && \ + python3 setup.py clean --all && \ PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} python3 setup.py develop && \ - cd / && \ + cd ../ && \ rm -rf vllm/.git +RUN mkdir -p ${COMMON_WORKDIR}/vllm-omni + # Step 3: Copy vllm-omni code and install without uv -COPY . ${APP_DIR} -RUN python3 -m pip install --no-cache-dir ".[dev]" +COPY . ${COMMON_WORKDIR}/vllm-omni +RUN cd ${COMMON_WORKDIR}/vllm-omni && python3 -m pip install --no-cache-dir ".[dev]" # Create python symlink RUN ln -sf /usr/bin/python3 /usr/bin/python -# Step 4: Set environment variables for ROCm optimization -ENV MIOPEN_FIND_MODE=FAST -ENV VLLM_ROCM_USE_AITER=1 -ENV VLLM_ROCM_USE_AITER_MHA=1 -ENV VLLM_ROCM_USE_AITER_LINEAR=0 -ENV VLLM_ROCM_USE_AITER_RMSNORM=0 - ENTRYPOINT [] \ No newline at end of file From 055090d7bfc13c4f56fea591431854d5c20a84a1 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Fri, 12 Dec 2025 11:56:01 +0000 Subject: [PATCH 08/32] fix precommit Signed-off-by: tjtanaa --- docker/Dockerfile.rocm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 0c8fb1ab419..872fb0c049a 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -33,4 +33,4 @@ RUN cd ${COMMON_WORKDIR}/vllm-omni && python3 -m pip install --no-cache-dir ".[d # Create python symlink RUN ln -sf /usr/bin/python3 /usr/bin/python -ENTRYPOINT [] \ No newline at end of file +ENTRYPOINT [] From e8374b5ee998289e96455456c15e6a6fd3623412 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Fri, 12 Dec 2025 12:14:34 +0000 Subject: [PATCH 09/32] test pushing CI docker Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index cfa4c4cd012..cd527621557 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -2,7 +2,10 @@ steps: - label: ":docker: Build image" key: image-build commands: - - "docker build -f docker/Dockerfile.rocm -t vllm-omni-rocm ." + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker build -f docker/Dockerfile.rocm -t vllm-omni-rocm-ci ." + - "docker tag vllm-omni-rocm-ci public.ecr.aws/q9t5s3a7/vllm-rocm-ci-test-repo:$BUILDKITE_COMMIT" + - "docker push public.ecr.aws/q9t5s3a7/vllm-rocm-ci-test-repo:$BUILDKITE_COMMIT" agents: queue: "amd-cpu" From 02e68e22d4009379b9128c401a578bccf63d6e00 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Fri, 12 Dec 2025 12:21:56 +0000 Subject: [PATCH 10/32] try using cpu_queue_premerge_us_east_1 to build image Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index cd527621557..67741c5ece1 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -7,7 +7,7 @@ steps: - "docker tag vllm-omni-rocm-ci public.ecr.aws/q9t5s3a7/vllm-rocm-ci-test-repo:$BUILDKITE_COMMIT" - "docker push public.ecr.aws/q9t5s3a7/vllm-rocm-ci-test-repo:$BUILDKITE_COMMIT" agents: - queue: "amd-cpu" + queue: "cpu_queue_premerge_us_east_1" - label: "Simple Unit Test" commands: From 57661bd3d98453d43fb72737c882b1d642768d19 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Mon, 15 Dec 2025 08:57:01 +0000 Subject: [PATCH 11/32] add preliminary script to run amd ci Signed-off-by: tjtanaa --- .buildkite/bootstrap-amd-omni.sh | 238 +++++++++++++ .../scripts/hardware_ci/run-amd-test.sh | 240 +++++++++++++ .buildkite/test-amd.yaml | 24 +- .buildkite/test-template-amd-omni.j2 | 335 ++++++++++++++++++ 4 files changed, 823 insertions(+), 14 deletions(-) create mode 100755 .buildkite/bootstrap-amd-omni.sh create mode 100755 .buildkite/scripts/hardware_ci/run-amd-test.sh create mode 100644 .buildkite/test-template-amd-omni.j2 diff --git a/.buildkite/bootstrap-amd-omni.sh b/.buildkite/bootstrap-amd-omni.sh new file mode 100755 index 00000000000..a38b7622011 --- /dev/null +++ b/.buildkite/bootstrap-amd-omni.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# vllm-omni customized version +# Based on: https://github.com/vllm-project/ci-infra/blob/main/buildkite/bootstrap-amd.sh +# Last synced: 2025-12-15 +# Modifications: Use local template file instead of downloading from ci-infra + +set -euo pipefail + +if [[ -z "${RUN_ALL:-}" ]]; then + RUN_ALL=0 +fi + +if [[ -z "${NIGHTLY:-}" ]]; then + NIGHTLY=0 +fi + +if [[ -z "${VLLM_CI_BRANCH:-}" ]]; then + VLLM_CI_BRANCH="main" +fi + +if [[ -z "${AMD_MIRROR_HW:-}" ]]; then + AMD_MIRROR_HW="amdproduction" +fi + +if [[ -z "${DOCS_ONLY_DISABLE:-}" ]]; then + DOCS_ONLY_DISABLE=0 +fi + +fail_fast() { + DISABLE_LABEL="ci-no-fail-fast" + # If BUILDKITE_PULL_REQUEST != "false", then we check the PR labels using curl and jq + if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then + PR_LABELS=$(curl -s "https://api.github.com/repos/vllm-project/vllm-omni/pulls/$BUILDKITE_PULL_REQUEST" | jq -r '.labels[].name') + if [[ $PR_LABELS == *"$DISABLE_LABEL"* ]]; then + echo false + else + echo true + fi + else + echo false # not a PR or BUILDKITE_PULL_REQUEST not set + fi +} + +check_run_all_label() { + RUN_ALL_LABEL="ready-run-all-tests" + # If BUILDKITE_PULL_REQUEST != "false", then we check the PR labels using curl and jq + if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then + PR_LABELS=$(curl -s "https://api.github.com/repos/vllm-project/vllm-omni/pulls/$BUILDKITE_PULL_REQUEST" | jq -r '.labels[].name') + if [[ $PR_LABELS == *"$RUN_ALL_LABEL"* ]]; then + echo true + else + echo false + fi + else + echo false # not a PR or BUILDKITE_PULL_REQUEST not set + fi +} + +if [[ -z "${COV_ENABLED:-}" ]]; then + COV_ENABLED=0 +fi + +upload_pipeline() { + echo "Uploading pipeline..." + # Install minijinja + ls .buildkite || buildkite-agent annotate --style error 'Please merge upstream main branch for buildkite CI' + curl -sSfL https://github.com/mitsuhiko/minijinja/releases/download/2.3.1/minijinja-cli-installer.sh | sh + source /var/lib/buildkite-agent/.cargo/env + + if [[ $BUILDKITE_PIPELINE_SLUG == "fastcheck" ]]; then + AMD_MIRROR_HW="amdtentative" + fi + + # Use local template file for vllm-omni + cp .buildkite/test-template-amd-omni.j2 .buildkite/test-template.j2 + + + # (WIP) Use pipeline generator instead of jinja template + if [ -e ".buildkite/pipeline_generator/pipeline_generator.py" ]; then + python -m pip install click pydantic + python .buildkite/pipeline_generator/pipeline_generator.py --run_all=$RUN_ALL --list_file_diff="$LIST_FILE_DIFF" --nightly="$NIGHTLY" --mirror_hw="$AMD_MIRROR_HW" + buildkite-agent pipeline upload .buildkite/pipeline.yaml + exit 0 + fi + echo "List file diff: $LIST_FILE_DIFF" + echo "Run all: $RUN_ALL" + echo "Nightly: $NIGHTLY" + echo "AMD Mirror HW: $AMD_MIRROR_HW" + + FAIL_FAST=$(fail_fast) + + cd .buildkite + ( + set -x + # Output pipeline.yaml with all blank lines removed + minijinja-cli test-template.j2 test-amd.yaml \ + -D branch="$BUILDKITE_BRANCH" \ + -D list_file_diff="$LIST_FILE_DIFF" \ + -D run_all="$RUN_ALL" \ + -D nightly="$NIGHTLY" \ + -D mirror_hw="$AMD_MIRROR_HW" \ + -D fail_fast="$FAIL_FAST" \ + -D vllm_use_precompiled="$VLLM_USE_PRECOMPILED" \ + -D vllm_merge_base_commit="$(git merge-base origin/main HEAD)" \ + -D cov_enabled="$COV_ENABLED" \ + -D vllm_ci_branch="$VLLM_CI_BRANCH" \ + | sed '/^[[:space:]]*$/d' \ + > pipeline.yaml + ) + cat pipeline.yaml + buildkite-agent artifact upload pipeline.yaml + buildkite-agent pipeline upload pipeline.yaml + exit 0 +} + +get_diff() { + $(git add .) + echo $(git diff --name-only --diff-filter=ACMDR $(git merge-base origin/main HEAD)) +} + +get_diff_main() { + $(git add .) + echo $(git diff --name-only --diff-filter=ACMDR HEAD~1) +} + +file_diff=$(get_diff) +if [[ $BUILDKITE_BRANCH == "main" ]]; then + file_diff=$(get_diff_main) +fi + +# ---------------------------------------------------------------------- +# Early exit start: skip pipeline if conditions are met +# ---------------------------------------------------------------------- + +# skip pipeline if all changed files are under docs/ +if [[ "${DOCS_ONLY_DISABLE}" != "1" ]]; then + if [[ -n "${file_diff:-}" ]]; then + docs_only=1 + # Robust iteration over newline-separated file_diff + while IFS= read -r f; do + [[ -z "$f" ]] && continue + # **Policy:** only skip if *every* path starts with docs/ + if [[ "$f" != docs/* ]]; then + docs_only=0 + break + fi + done < <(printf '%s\n' "$file_diff" | tr ' ' '\n' | tr -d '\r') + + if [[ "$docs_only" -eq 1 ]]; then + buildkite-agent annotate ":memo: CI skipped — docs/** only changes detected + +\`\`\` +${file_diff} +\`\`\`" --style "info" || true + echo "[docs-only] All changes are under docs/. Exiting before pipeline upload." + exit 0 + fi + fi +fi + +# ---------------------------------------------------------------------- +# Early exit end +# ---------------------------------------------------------------------- + +patterns=( + "docker/Dockerfile" + "CMakeLists.txt" + "requirements/common.txt" + "requirements/cuda.txt" + "requirements/build.txt" + "requirements/test.txt" + "setup.py" + "csrc/" + "cmake/" +) + +ignore_patterns=( + "docker/Dockerfile." + "csrc/cpu" + "csrc/rocm" + "cmake/hipify.py" + "cmake/cpu_extension.cmake" +) + +for file in $file_diff; do + # First check if file matches any pattern + matches_pattern=0 + for pattern in "${patterns[@]}"; do + if [[ $file == $pattern* ]] || [[ $file == $pattern ]]; then + matches_pattern=1 + break + fi + done + + # If file matches pattern, check it's not in ignore patterns + if [[ $matches_pattern -eq 1 ]]; then + matches_ignore=0 + for ignore in "${ignore_patterns[@]}"; do + if [[ $file == $ignore* ]] || [[ $file == $ignore ]]; then + matches_ignore=1 + break + fi + done + + if [[ $matches_ignore -eq 0 ]]; then + RUN_ALL=1 + echo "Found changes: $file. Run all tests" + break + fi + fi +done + +# Check for ready-run-all-tests label +LABEL_RUN_ALL=$(check_run_all_label) +if [[ $LABEL_RUN_ALL == true ]]; then + RUN_ALL=1 + NIGHTLY=1 + echo "Found 'ready-run-all-tests' label. Running all tests including optional tests." +fi + +# Decide whether to use precompiled wheels +# Relies on existing patterns array as a basis. +if [[ -n "${VLLM_USE_PRECOMPILED:-}" ]]; then + echo "VLLM_USE_PRECOMPILED is already set to: $VLLM_USE_PRECOMPILED" +elif [[ $RUN_ALL -eq 1 ]]; then + export VLLM_USE_PRECOMPILED=0 + echo "Detected critical changes, building wheels from source" +else + export VLLM_USE_PRECOMPILED=1 + echo "No critical changes, using precompiled wheels" +fi + + +LIST_FILE_DIFF=$(get_diff | tr ' ' '|') +if [[ $BUILDKITE_BRANCH == "main" ]]; then + LIST_FILE_DIFF=$(get_diff_main | tr ' ' '|') +fi +upload_pipeline diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh new file mode 100755 index 00000000000..64e97011768 --- /dev/null +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -0,0 +1,240 @@ +#!/bin/bash +# vllm-omni customized version +# Based on: vllm/.buildkite/scripts/hardware_ci/run-amd-test.sh +# Last synced: 2025-12-15 +# Modifications: docker image name for vllm-omni + +# This script runs test inside the corresponding ROCm docker container. +set -o pipefail + +# Export Python path +export PYTHONPATH=".." + +# Print ROCm version +echo "--- Confirming Clean Initial State" +while true; do + sleep 3 + if grep -q clean /opt/amdgpu/etc/gpu_state; then + echo "GPUs state is \"clean\"" + break + fi +done + +echo "--- ROCm info" +rocminfo + +# cleanup older docker images +cleanup_docker() { + # Get Docker's root directory + docker_root=$(docker info -f '{{.DockerRootDir}}') + if [ -z "$docker_root" ]; then + echo "Failed to determine Docker root directory." + exit 1 + fi + echo "Docker root directory: $docker_root" + # Check disk usage of the filesystem where Docker's root directory is located + disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//') + # Define the threshold + threshold=70 + if [ "$disk_usage" -gt "$threshold" ]; then + echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..." + # Remove dangling images (those that are not tagged and not used by any container) + docker image prune -f + # Remove unused volumes / force the system prune for old images as well. + docker volume prune -f && docker system prune --force --filter "until=72h" --all + echo "Docker images and volumes cleanup completed." + else + echo "Disk usage is below $threshold%. No cleanup needed." + fi +} + +# Call the cleanup docker function +cleanup_docker + +echo "--- Resetting GPUs" + +echo "reset" > /opt/amdgpu/etc/gpu_state + +while true; do + sleep 3 + if grep -q clean /opt/amdgpu/etc/gpu_state; then + echo "GPUs state is \"clean\"" + break + fi +done + +echo "--- Pulling container" +image_name="public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:${BUILDKITE_COMMIT}-rocm-omni" +container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" +docker pull "${image_name}" + +remove_docker_container() { + docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true +} +trap remove_docker_container EXIT + +echo "--- Running container" + +HF_CACHE="$(realpath ~)/huggingface" +mkdir -p "${HF_CACHE}" +HF_MOUNT="/root/.cache/huggingface" + +commands=$@ +echo "Commands:$commands" + +commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"pytest -v -s basic_correctness/test_basic_correctness.py"} + +if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then + commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"} +fi + +commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"pytest -v -s compile/test_basic_correctness.py"} + +if [[ $commands == *"pytest -v -s lora"* ]]; then + commands=${commands//"pytest -v -s lora"/"VLLM_ROCM_CUSTOM_PAGED_ATTN=0 pytest -v -s lora"} +fi + +#ignore certain kernels tests +if [[ $commands == *" kernels/core"* ]]; then + commands="${commands} \ + --ignore=kernels/core/test_fused_quant_layernorm.py \ + --ignore=kernels/core/test_permute_cols.py" +fi + +if [[ $commands == *" kernels/attention"* ]]; then + commands="${commands} \ + --ignore=kernels/attention/test_attention_selector.py \ + --ignore=kernels/attention/test_encoder_decoder_attn.py \ + --ignore=kernels/attention/test_flash_attn.py \ + --ignore=kernels/attention/test_flashinfer.py \ + --ignore=kernels/attention/test_prefix_prefill.py \ + --ignore=kernels/attention/test_cascade_flash_attn.py \ + --ignore=kernels/attention/test_mha_attn.py \ + --ignore=kernels/attention/test_lightning_attn.py \ + --ignore=kernels/attention/test_attention.py" +fi + +if [[ $commands == *" kernels/quantization"* ]]; then + commands="${commands} \ + --ignore=kernels/quantization/test_int8_quant.py \ + --ignore=kernels/quantization/test_machete_mm.py \ + --ignore=kernels/quantization/test_block_fp8.py \ + --ignore=kernels/quantization/test_block_int8.py \ + --ignore=kernels/quantization/test_marlin_gemm.py \ + --ignore=kernels/quantization/test_cutlass_scaled_mm.py \ + --ignore=kernels/quantization/test_int8_kernel.py" +fi + +if [[ $commands == *" kernels/mamba"* ]]; then + commands="${commands} \ + --ignore=kernels/mamba/test_mamba_mixer2.py \ + --ignore=kernels/mamba/test_causal_conv1d.py \ + --ignore=kernels/mamba/test_mamba_ssm_ssd.py" +fi + +if [[ $commands == *" kernels/moe"* ]]; then + commands="${commands} \ + --ignore=kernels/moe/test_moe.py \ + --ignore=kernels/moe/test_cutlass_moe.py \ + --ignore=kernels/moe/test_triton_moe_ptpc_fp8.py" +fi + +#ignore certain Entrypoints/openai tests +if [[ $commands == *" entrypoints/openai "* ]]; then + commands=${commands//" entrypoints/openai "/" entrypoints/openai \ + --ignore=entrypoints/openai/test_audio.py \ + --ignore=entrypoints/openai/test_shutdown.py \ + --ignore=entrypoints/openai/test_completion.py \ + --ignore=entrypoints/openai/test_sleep.py \ + --ignore=entrypoints/openai/test_models.py \ + --ignore=entrypoints/openai/test_lora_adapters.py \ + --ignore=entrypoints/openai/test_return_tokens_as_ids.py \ + --ignore=entrypoints/openai/test_root_path.py \ + --ignore=entrypoints/openai/test_tokenization.py \ + --ignore=entrypoints/openai/test_prompt_validation.py "} +fi + +#ignore certain Entrypoints/llm tests +if [[ $commands == *" entrypoints/llm "* ]]; then + commands=${commands//" entrypoints/llm "/" entrypoints/llm \ + --ignore=entrypoints/llm/test_chat.py \ + --ignore=entrypoints/llm/test_accuracy.py \ + --ignore=entrypoints/llm/test_init.py \ + --ignore=entrypoints/llm/test_prompt_validation.py "} +fi + +# --ignore=entrypoints/openai/test_encoder_decoder.py \ +# --ignore=entrypoints/openai/test_embedding.py \ +# --ignore=entrypoints/openai/test_oot_registration.py +# --ignore=entrypoints/openai/test_accuracy.py \ +# --ignore=entrypoints/openai/test_models.py <= Fails on MI250 but passes on MI300 as of 2025-03-13 + + +PARALLEL_JOB_COUNT=8 +MYPYTHONPATH=".." + +# Test that we're launching on the machine that has +# proper access to GPUs +render_gid=$(getent group render | cut -d: -f3) +if [[ -z "$render_gid" ]]; then + echo "Error: 'render' group not found. This is required for GPU access." >&2 + exit 1 +fi + +# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs. +if [[ $commands == *"--shard-id="* ]]; then + # assign job count as the number of shards used + commands=$(echo "$commands" | sed -E "s/--num-shards[[:blank:]]*=[[:blank:]]*[0-9]*/--num-shards=${PARALLEL_JOB_COUNT} /g" | sed 's/ \\ / /g') + for GPU in $(seq 0 $(($PARALLEL_JOB_COUNT-1))); do + # assign shard-id for each shard + commands_gpu=$(echo "$commands" | sed -E "s/--shard-id[[:blank:]]*=[[:blank:]]*[0-9]*/--shard-id=${GPU} /g" | sed 's/ \\ / /g') + echo "Shard ${GPU} commands:$commands_gpu" + echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" + docker run \ + --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ + --network=host \ + --shm-size=16gb \ + --group-add "$render_gid" \ + --rm \ + -e HIP_VISIBLE_DEVICES="${GPU}" \ + -e HF_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -v "${HF_CACHE}:${HF_MOUNT}" \ + -e "HF_HOME=${HF_MOUNT}" \ + -e "PYTHONPATH=${MYPYTHONPATH}" \ + --name "${container_name}_${GPU}" \ + "${image_name}" \ + /bin/bash -c "${commands_gpu}" \ + |& while read -r line; do echo ">>Shard $GPU: $line"; done & + PIDS+=($!) + done + #wait for all processes to finish and collect exit codes + for pid in "${PIDS[@]}"; do + wait "${pid}" + STATUS+=($?) + done + for st in "${STATUS[@]}"; do + if [[ ${st} -ne 0 ]]; then + echo "One of the processes failed with $st" + exit "${st}" + fi + done +else + echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES" + docker run \ + --device /dev/kfd $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES \ + --network=host \ + --shm-size=16gb \ + --group-add "$render_gid" \ + --rm \ + -e HF_TOKEN \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -v "${HF_CACHE}:${HF_MOUNT}" \ + -e "HF_HOME=${HF_MOUNT}" \ + -e "PYTHONPATH=${MYPYTHONPATH}" \ + --name "${container_name}" \ + "${image_name}" \ + /bin/bash -c "${commands}" +fi diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 67741c5ece1..d781c2b4bd6 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1,16 +1,12 @@ steps: - - label: ":docker: Build image" - key: image-build - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "docker build -f docker/Dockerfile.rocm -t vllm-omni-rocm-ci ." - - "docker tag vllm-omni-rocm-ci public.ecr.aws/q9t5s3a7/vllm-rocm-ci-test-repo:$BUILDKITE_COMMIT" - - "docker push public.ecr.aws/q9t5s3a7/vllm-rocm-ci-test-repo:$BUILDKITE_COMMIT" - agents: - queue: "cpu_queue_premerge_us_east_1" - - label: "Simple Unit Test" - commands: - - ".buildkite/scripts/simple_test.sh" - agents: - queue: "cpu_queue_premerge" +- label: "Diffusion Model Test" + timeout_in_minutes: 15 + agent_pool: mi325_2 + depends_on: image-build + mirror_hardwares: [amdexperimental, amdproduction, amdtentative] + grade: Blocking + source_file_dependencies: + - ./ + commands: + - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py \ No newline at end of file diff --git a/.buildkite/test-template-amd-omni.j2 b/.buildkite/test-template-amd-omni.j2 new file mode 100644 index 00000000000..8c44b8bc1e6 --- /dev/null +++ b/.buildkite/test-template-amd-omni.j2 @@ -0,0 +1,335 @@ +{# vllm-omni customized version + Based on: https://github.com/vllm-project/ci-infra/blob/main/buildkite/test-template-amd.j2 + Last synced: 2025-12-15 + Modifications: docker image name, default working dir, build queue, build commands for vllm-omni +#} +{% set cov_enabled = (cov_enabled == "1") %} +{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} +{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-torch-nightly" %} +{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cu118" %} +{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cpu" %} +{% if branch == "main" %} +{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %} +{% set docker_image_latest = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest" %} +{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-torch-nightly" %} +{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %} +{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %} +{% endif %} +{% set docker_image_amd = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-rocm-omni" %} +{% set default_working_dir = "/app/vllm-omni/tests" %} +{% set hf_home = "/root/.cache/huggingface" %} +{% set hf_home_efs = "/mnt/efs/hf_cache" %} +{% set hf_home_fsx = "/fsx/hf_cache" %} +{% set list_file_diff = list_file_diff | split("|") %} + +{# Intelligent test targeting: Detect when only test files changed and collect them #} +{%- set tests_acc = namespace(only_tests=true, any=false, changed=[]) %} +{%- for file in list_file_diff %} +{%- if file[:6] == 'tests/' and '/test_' in file and file[-3:] == '.py' %} +{%- set tests_acc.any = true %} +{%- set tests_acc.changed = tests_acc.changed + [file[6:]] %} +{%- else %} +{%- set tests_acc.only_tests = false %} +{%- endif %} +{%- endfor %} +{%- set tests_only = (tests_acc.only_tests and tests_acc.any) %} +{%- set changed_tests = tests_acc.changed %} + +{% macro add_pytest_coverage(cmd, coverage_file) %} +{% if "pytest " in cmd %} +COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm --cov-report=xml --cov-append --durations=0 ") }} || true +{% else %} +{{ cmd }} +{% endif %} +{% endmacro %} + +{% macro add_docker_pytest_coverage(step, cov_enabled) %} +{# Compute coverage file id #} +{% set step_length = step.label | length %} +{% set step_first = step.label | first | default("x") %} +{% set coverage_file = ".coverage." + step_length ~ "_" ~ step_first %} + +{# Intelligent test targeting: Build matched test targets for this step when only tests changed #} +{%- set match_ns = namespace(targets=[]) %} +{%- if tests_only and step.source_file_dependencies %} +{%- for dep in step.source_file_dependencies %} +{%- if dep[:6] == 'tests/' %} +{%- set dep_rel = dep[6:] %} +{# Handle deps that already end with '/' (e.g., tests/benchmarks/) #} +{%- if dep_rel[-1:] == '/' %} +{%- set dep_dir_prefix = dep_rel %} +{%- set dep_file_name = dep_rel[:-1] ~ '.py' %} +{%- else %} +{%- set dep_dir_prefix = dep_rel ~ '/' %} +{%- set dep_file_name = dep_rel ~ '.py' %} +{%- endif %} +{%- for t in changed_tests %} +{# Check if t starts with dep_dir_prefix (for directories) or equals dep_file_name (for files) #} +{%- set prefix_len = dep_dir_prefix | length %} +{%- set t_prefix = t[:prefix_len] %} +{%- set cond1 = (t | length >= prefix_len and t_prefix == dep_dir_prefix) %} +{%- set cond2 = (t == dep_file_name) %} +{%- if cond1 or cond2 %} +{%- set match_ns.targets = match_ns.targets + [t] %} +{%- endif %} +{%- endfor %} +{%- endif %} +{%- endfor %} +{%- endif %} +{%- set matched_targets = match_ns.targets %} + +{# If we have matched targets, run only those specific tests #} +{% if matched_targets | length > 0 %} +pytest -v -s {{ matched_targets | join(' ') }} +{% else %} +{# Default behavior: preserve original commands with optional coverage injection #} +{% if cov_enabled %} +{% set ns = namespace(has_pytest=false) %} +{% if step.command %} +{% if "pytest " in step.command %}{% set ns.has_pytest = true %}{% endif %} +{{ add_pytest_coverage(step.command, coverage_file) }} +{% else %} +{% for cmd in step.commands %} +{% if "pytest " in cmd %}{% set ns.has_pytest = true %}{% endif %} +{{ add_pytest_coverage(cmd, coverage_file) }}{{ " && " if not loop.last else "" }}{% endfor %} +{% endif %}{% if ns.has_pytest %} && curl -sSL https://raw.githubusercontent.com/vllm-project/ci-infra/{{ vllm_ci_branch | default('main') }}/buildkite/scripts/upload_codecov.sh | bash -s -- \"{{ step.label }}\"{% endif %} +{% else %} +{{ step.command or (step.commands | join(' && ')) | safe }} +{% endif %} +{% endif %} +{% endmacro %} + +{% macro render_cuda_config(step, image, default_working_dir, hf_home_fsx, hf_home, branch) %} +agents: + {% if step.label == "Documentation Build" %} + queue: small_cpu_queue_premerge + {% elif step.no_gpu %} + queue: cpu_queue_premerge_us_east_1 + {% elif step.gpu == "a100" %} + queue: a100_queue + {% elif step.gpu == "h100" %} + queue: mithril-h100-pool + {% elif step.gpu == "h200" %} + queue: skylab-h200 + {% elif step.gpu == "b200" %} + queue: B200 + {% elif step.num_gpus == 2 or step.num_gpus == 4 %} + queue: gpu_4_queue + {% else %} + queue: gpu_1_queue + {% endif %} + +{% if step.num_nodes >= 2 %} +commands: + - ./.buildkite/scripts/run-multi-node-test.sh {{ (step.working_dir or default_working_dir) | safe }} {{ step.num_nodes }} {{ step.num_gpus }} {{ image }} {% for command in step.commands %}"{{ (command | join(' && ')) | safe }}" {% endfor %} +{% endif %} + +{% if step.parallelism %} +parallelism: {{ step.parallelism }} +{% endif %} + +retry: + automatic: + - exit_status: -1 + limit: 1 + - exit_status: -10 + limit: 1 + +{% if step.num_nodes < 2 %} +plugins: + {% if step.gpu != "a100" and step.gpu != "h100" and step.gpu != "h200" and step.gpu != "b200" %} + - docker#v5.2.0: + image: {{ image }} + always-pull: true + propagate-environment: true + {% if not step.no_gpu %} + gpus: all + {% endif %} + {% if step.label == "Benchmarks" or step.mount_buildkite_agent or cov_enabled %} + mount-buildkite-agent: true + {% endif %} + command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"] + environment: + - VLLM_USAGE_SOURCE=ci-test + - NCCL_CUMEM_HOST_ENABLE=0 + - HF_HOME={{ hf_home_fsx }} + - HF_TOKEN + - CODECOV_TOKEN + {% if fail_fast == "true" %} + - PYTEST_ADDOPTS=-x + {% endif %} + {% if branch == "main" %} + - BUILDKITE_ANALYTICS_TOKEN + {% endif %} + {% if step.label == "Speculative decoding tests" %} + - VLLM_ATTENTION_BACKEND=XFORMERS + {% endif %} + volumes: + - /dev/shm:/dev/shm + - {{ hf_home_fsx }}:{{ hf_home_fsx }} + {% elif step.gpu == "h200" %} + - docker#v5.2.0: + image: {{ image }} + always-pull: true + propagate-environment: true + gpus: all + command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"] + environment: + - VLLM_USAGE_SOURCE=ci-test + - NCCL_CUMEM_HOST_ENABLE=0 + - HF_HOME=/benchmark-hf-cache + - HF_TOKEN + - CODECOV_TOKEN + {% if fail_fast == "true" %} + - PYTEST_ADDOPTS=-x + {% endif %} + {% if branch == "main" %} + - BUILDKITE_ANALYTICS_TOKEN + {% endif %} + volumes: + - /dev/shm:/dev/shm + - /data/benchmark-hf-cache:/benchmark-hf-cache + - /data/benchmark-vllm-cache:/root/.cache/vllm + {% elif step.gpu == "b200" %} + - docker#v5.2.0: + image: {{ image }} + always-pull: true + propagate-environment: true + # gpus will be configured by BUILDKITE_PLUGIN_DOCKER_GPUS in per host environment variable. + # gpus: all + command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"] + environment: + - VLLM_USAGE_SOURCE=ci-test + - NCCL_CUMEM_HOST_ENABLE=0 + - HF_HOME=/benchmark-hf-cache + - HF_TOKEN + - CODECOV_TOKEN + {% if fail_fast == "true" %} + - PYTEST_ADDOPTS=-x + {% endif %} + {% if branch == "main" %} + - BUILDKITE_ANALYTICS_TOKEN + {% endif %} + volumes: + - /dev/shm:/dev/shm + - /data/benchmark-hf-cache:/benchmark-hf-cache + - /data/benchmark-vllm-cache:/root/.cache/vllm + {% elif step.gpu == "h100" %} + - kubernetes: + podSpec: + containers: + - image: {{ image }} + command: + - bash -c "{{ '(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd ' ~ ((step.working_dir or default_working_dir) | safe) ~ ' && ' ~ (step.command or (step.commands | join(" && ")) | safe) }}" + resources: + limits: + nvidia.com/gpu: {{ step.num_gpus or 1 }} + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: {{ hf_home }} + env: + - name: VLLM_USAGE_SOURCE + value: ci-test + - name: NCCL_CUMEM_HOST_ENABLE + value: "0" + - name: HF_HOME + value: {{ hf_home }} + nodeSelector: + nvidia.com/gpu.product: NVIDIA-H100-80GB-HBM3 + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: /mnt/hf-cache + type: Directory + {% else %} + - kubernetes: + podSpec: + priorityClassName: ci + containers: + - image: {{ image }} + command: + - bash -c "{{ '(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd ' ~ ((step.working_dir or default_working_dir) | safe) ~ ' && ' ~ (step.command or (step.commands | join(" && ")) | safe) }}" + resources: + limits: + nvidia.com/gpu: {{ step.num_gpus or 1 }} + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: {{ hf_home }} + env: + - name: VLLM_USAGE_SOURCE + value: ci-test + - name: NCCL_CUMEM_HOST_ENABLE + value: "0" + - name: HF_HOME + value: {{ hf_home }} + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: {{ hf_home }} + type: Directory + {% endif %} +{% endif %} +{% endmacro %} + + - group: "AMD Tests" + depends_on: ~ + steps: + - label: "AMD: :docker: build image" + depends_on: ~ + soft_fail: false + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker build -f docker/Dockerfile.rocm -t {{ docker_image_amd }} --progress plain ." + - "docker push {{ docker_image_amd }}" + key: "amd-build" + env: + DOCKER_BUILDKIT: "1" + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 1 + - exit_status: -10 # Agent was lost + limit: 1 + - exit_status: 1 # Machine occasionally fail + limit: 1 + agents: + queue: cpu_queue_premerge_us_east_1 + + {% for step in steps %} + {% if step.mirror_hardwares and mirror_hw in step.mirror_hardwares %} + - label: "{{ step.agent_pool }}: {{ step.label }}" + depends_on: amd-build + agents: + {% if step.agent_pool %} + queue: amd_{{ step.agent_pool }} + {% else %} + queue: amd_mi325_1 + {% endif %} + command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh "(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" + env: + DOCKER_BUILDKIT: "1" + priority: 100 + {% if step.grade and step.grade == "Blocking" %} + soft_fail: false + {% else %} + soft_fail: true + {% endif%} + {% endif %} + {% endfor %} From 89987f3ab5735c03123fecbc18505f7508054c8e Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Tue, 16 Dec 2025 13:29:44 +0000 Subject: [PATCH 12/32] add change the working directory of vllm omni docker image in CI; add is rocm to handle unit tests that are failing Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 4 ++-- .buildkite/test-template-amd-omni.j2 | 2 +- tests/e2e/__init__.py | 0 tests/e2e/offline_inference/test_t2i_model.py | 6 ++++++ vllm_omni/utils/platform_utils.py | 6 ++++++ 5 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 tests/e2e/__init__.py diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index d781c2b4bd6..9d891d1ff9e 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -4,9 +4,9 @@ steps: timeout_in_minutes: 15 agent_pool: mi325_2 depends_on: image-build - mirror_hardwares: [amdexperimental, amdproduction, amdtentative] + mirror_hardwares: [amdexperimental, amdtentative] grade: Blocking source_file_dependencies: - ./ commands: - - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py \ No newline at end of file + - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py diff --git a/.buildkite/test-template-amd-omni.j2 b/.buildkite/test-template-amd-omni.j2 index 8c44b8bc1e6..47a207580a4 100644 --- a/.buildkite/test-template-amd-omni.j2 +++ b/.buildkite/test-template-amd-omni.j2 @@ -16,7 +16,7 @@ {% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %} {% endif %} {% set docker_image_amd = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-rocm-omni" %} -{% set default_working_dir = "/app/vllm-omni/tests" %} +{% set default_working_dir = "/app/vllm-omni" %} {% set hf_home = "/root/.cache/huggingface" %} {% set hf_home_efs = "/mnt/efs/hf_cache" %} {% set hf_home_fsx = "/fsx/hf_cache" %} diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/e2e/offline_inference/test_t2i_model.py b/tests/e2e/offline_inference/test_t2i_model.py index 77097836622..1cac2dfddf9 100644 --- a/tests/e2e/offline_inference/test_t2i_model.py +++ b/tests/e2e/offline_inference/test_t2i_model.py @@ -11,11 +11,17 @@ sys.path.insert(0, str(REPO_ROOT)) from vllm_omni import Omni +from vllm_omni.utils.platform_utils import is_rocm os.environ["VLLM_TEST_CLEAN_GPU_MEMORY"] = "1" models = ["Tongyi-MAI/Z-Image-Turbo", "riverclouds/qwen_image_random"] +if is_rocm(): + # vLLM V0.11.0 has issue running + # riverclouds/qwen_image_random on ROCm + models = ["Tongyi-MAI/Z-Image-Turbo"] + @pytest.mark.parametrize("model_name", models) def test_diffusion_model(model_name: str): diff --git a/vllm_omni/utils/platform_utils.py b/vllm_omni/utils/platform_utils.py index 385b1a8f36c..679471415f0 100644 --- a/vllm_omni/utils/platform_utils.py +++ b/vllm_omni/utils/platform_utils.py @@ -6,6 +6,8 @@ def detect_device_type() -> str: device_type = getattr(current_platform, "device_type", None) + if current_platform.is_rocm(): + return "rocm" if isinstance(device_type, str) and device_type: return device_type.lower() if torch.cuda.is_available(): @@ -19,6 +21,10 @@ def is_npu() -> bool: return detect_device_type() == "npu" +def is_rocm() -> bool: + return detect_device_type() == "rocm" + + def get_device_control_env_var() -> str: """Return the environment variable name for device visibility control.""" if hasattr(current_platform, "device_control_env_var"): From 19c3056404eda321244d1ee04730126b14141359 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Tue, 16 Dec 2025 14:41:21 +0000 Subject: [PATCH 13/32] fix test path; add qwen25 omni Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 16 ++- .../stage_configs/rocm/qwen2_5_omni_ci.yaml | 105 ++++++++++++++++++ .../offline_inference/test_qwen2_5_omni.py | 9 +- tests/e2e/offline_inference/utils.py | 1 + 4 files changed, 127 insertions(+), 4 deletions(-) create mode 100644 tests/e2e/offline_inference/stage_configs/rocm/qwen2_5_omni_ci.yaml diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 9d891d1ff9e..c0a4bd16e60 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -3,10 +3,22 @@ steps: - label: "Diffusion Model Test" timeout_in_minutes: 15 agent_pool: mi325_2 - depends_on: image-build - mirror_hardwares: [amdexperimental, amdtentative] + depends_on: amd-build + mirror_hardwares: [amdexperimental, amdproduction, amdtentative] grade: Blocking source_file_dependencies: - ./ commands: - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py + +- label: "Omni Model Test" + timeout_in_minutes: 15 + agent_pool: mi325_2 + depends_on: amd-build + mirror_hardwares: [amdexperimental, amdproduction, amdtentative] + source_file_dependencies: + - ./ + commands: + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py diff --git a/tests/e2e/offline_inference/stage_configs/rocm/qwen2_5_omni_ci.yaml b/tests/e2e/offline_inference/stage_configs/rocm/qwen2_5_omni_ci.yaml new file mode 100644 index 00000000000..96e9d7fa725 --- /dev/null +++ b/tests/e2e/offline_inference/stage_configs/rocm/qwen2_5_omni_ci.yaml @@ -0,0 +1,105 @@ +# stage config for running qwen2.5-omni with architecture of OmniLLM. + +# The following config has been verified on 2x 24GB GPU (L4/RTX3090/RTX4090). +# This config is optimized for CI e2e tests. +stage_args: + - stage_id: 0 + runtime: + process: true # Run this stage in a separate process + devices: "0" # Visible devices for this stage (CUDA_VISIBLE_DEVICES/torch.cuda.set_device) + max_batch_size: 1 + engine_args: + model_stage: thinker + model_arch: Qwen2_5OmniForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + max_model_len: 896 + max_num_batched_tokens: 896 + max_num_seqs: 1 + gpu_memory_utilization: 0.8 + skip_mm_profiling: true + enforce_eager: true # Now we only support eager mode + trust_remote_code: true + engine_output_type: latent + enable_prefix_caching: false + is_comprehension: true + final_output: true + final_output_type: text + default_sampling_params: + temperature: 0.0 + top_p: 1.0 + top_k: -1 + max_tokens: 128 + seed: 42 + detokenize: True + repetition_penalty: 1.1 + - stage_id: 1 + runtime: + process: true + devices: "1" + max_batch_size: 1 + engine_args: + model_stage: talker + model_arch: Qwen2_5OmniForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + max_model_len: 896 + max_num_batched_tokens: 896 + max_num_seqs: 1 + gpu_memory_utilization: 0.8 + skip_mm_profiling: true + enforce_eager: true + trust_remote_code: true + enable_prefix_caching: false + engine_output_type: latent + engine_input_source: [0] + custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen2_5_omni.thinker2talker + default_sampling_params: + temperature: 0.9 + top_p: 0.8 + top_k: 40 + max_tokens: 128 + seed: 42 + detokenize: True + repetition_penalty: 1.05 + stop_token_ids: [8294] + - stage_id: 2 + runtime: + process: true + devices: "0" # Example: use a different GPU than the previous stage; use "0" if single GPU + max_batch_size: 1 + engine_args: + model_stage: code2wav + model_arch: Qwen2_5OmniForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_generation_worker.GPUGenerationWorker + scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler + gpu_memory_utilization: 0.15 + enforce_eager: true + trust_remote_code: true + enable_prefix_caching: false + engine_output_type: audio + engine_input_source: [1] + final_output: true + final_output_type: audio + default_sampling_params: + temperature: 0.0 + top_p: 1.0 + top_k: -1 + max_tokens: 128 + seed: 42 + detokenize: True + repetition_penalty: 1.1 + +# Top-level runtime config (concise): default windows and stage edges +runtime: + enabled: true + defaults: + window_size: -1 # Simplified: trigger downstream only after full upstream completion + max_inflight: 1 # Simplified: process serially within each stage + edges: + - from: 0 # thinker → talker: trigger only after receiving full input (-1) + to: 1 + window_size: -1 + - from: 1 # talker → code2wav: trigger only after receiving full input (-1) + to: 2 + window_size: -1 diff --git a/tests/e2e/offline_inference/test_qwen2_5_omni.py b/tests/e2e/offline_inference/test_qwen2_5_omni.py index 72478cfd40f..45e8449284f 100644 --- a/tests/e2e/offline_inference/test_qwen2_5_omni.py +++ b/tests/e2e/offline_inference/test_qwen2_5_omni.py @@ -12,24 +12,29 @@ from vllm.assets.video import VideoAsset from vllm.multimodal.image import convert_image_mode +from vllm_omni.utils.platform_utils import detect_device_type + from .conftest import OmniRunner -from .utils import create_new_process_for_each_test models = ["Qwen/Qwen2.5-Omni-3B"] # CI stage config optimized for 24GB GPU (L4/RTX3090) stage_configs = [str(Path(__file__).parent / "stage_configs" / "qwen2_5_omni_ci.yaml")] +if detect_device_type() != "cuda": + # ROCm stage config optimized for MI325 GPU + stage_configs = [str(Path(__file__).parent / "stage_configs" / detect_device_type() / "qwen2_5_omni_ci.yaml")] + # Create parameter combinations for model and stage config test_params = [(model, stage_config) for model in models for stage_config in stage_configs] @pytest.mark.core_model @pytest.mark.parametrize("test_config", test_params) -@create_new_process_for_each_test() def test_mixed_modalities_to_audio(omni_runner: type[OmniRunner], test_config: tuple[str, str]) -> None: """Test processing audio, image, and video together, generating audio output.""" model, stage_config_path = test_config + print(f"Running test for model: {model} and stage config: {stage_config_path}") with omni_runner(model, seed=42, stage_configs_path=stage_config_path) as runner: # Prepare multimodal inputs question = "What is recited in the audio? What is in this image? Describe the video briefly." diff --git a/tests/e2e/offline_inference/utils.py b/tests/e2e/offline_inference/utils.py index 931e7b506cb..82c46ff55dd 100644 --- a/tests/e2e/offline_inference/utils.py +++ b/tests/e2e/offline_inference/utils.py @@ -20,6 +20,7 @@ VLLM_PATH = Path(__file__).parent.parent.parent """Path to root of the vLLM repository.""" +print(f"VLLM_PATH: {VLLM_PATH}") _P = ParamSpec("_P") From 8ae3569f597ca1ca7e159e4c6800e01af531e684 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Tue, 16 Dec 2025 15:36:59 +0000 Subject: [PATCH 14/32] add necessary env flag for mi325 vllm 0.11.0 Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index c0a4bd16e60..59a6e1f3a27 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -9,6 +9,11 @@ steps: source_file_dependencies: - ./ commands: + - export MIOPEN_FIND_MODE=FAST + - export VLLM_ROCM_USE_AITER=1 + - export VLLM_ROCM_USE_AITER_MHA=1 + - export VLLM_ROCM_USE_AITER_LINEAR=0 + - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py - label: "Omni Model Test" @@ -21,4 +26,9 @@ steps: commands: - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - export MIOPEN_FIND_MODE=FAST + - export VLLM_ROCM_USE_AITER=1 + - export VLLM_ROCM_USE_AITER_MHA=1 + - export VLLM_ROCM_USE_AITER_LINEAR=0 + - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py From 442dc4483aa4d950e4811f9797c806a53e2cb257 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Tue, 16 Dec 2025 16:23:06 +0000 Subject: [PATCH 15/32] fix get device; add qwen3-omni unit tests Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 19 +++- docker/Dockerfile.rocm | 1 + .../stage_configs/rocm/qwen3_omni_ci.yaml | 98 +++++++++++++++++ .../offline_inference/test_qwen2_5_omni.py | 6 +- .../e2e/offline_inference/test_qwen3_omni.py | 6 ++ vllm_omni/entrypoints/utils.py | 6 +- .../stage_configs/rocm/qwen2_5_omni.yaml | 102 ++++++++++++++++++ .../stage_configs/rocm/qwen3_omni_moe.yaml | 97 +++++++++++++++++ vllm_omni/utils/__init__.py | 2 + vllm_omni/utils/platform_utils.py | 4 +- 10 files changed, 332 insertions(+), 9 deletions(-) create mode 100644 tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml create mode 100644 vllm_omni/model_executor/stage_configs/rocm/qwen2_5_omni.yaml create mode 100644 vllm_omni/model_executor/stage_configs/rocm/qwen3_omni_moe.yaml diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 59a6e1f3a27..ab66b4d61a5 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -16,7 +16,7 @@ steps: - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -- label: "Omni Model Test" +- label: "Omni Model Test Qwen2-5-Omni" timeout_in_minutes: 15 agent_pool: mi325_2 depends_on: amd-build @@ -32,3 +32,20 @@ steps: - export VLLM_ROCM_USE_AITER_LINEAR=0 - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py + +- label: "Omni Model Test Qwen3-Omni" + timeout_in_minutes: 15 + agent_pool: mi325_2 + depends_on: amd-build + mirror_hardwares: [amdexperimental, amdproduction, amdtentative] + source_file_dependencies: + - ./ + commands: + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - export MIOPEN_FIND_MODE=FAST + - export VLLM_ROCM_USE_AITER=1 + - export VLLM_ROCM_USE_AITER_MHA=1 + - export VLLM_ROCM_USE_AITER_LINEAR=0 + - export VLLM_ROCM_USE_AITER_RMSNORM=0 + - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 872fb0c049a..849539e3a90 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -31,6 +31,7 @@ COPY . ${COMMON_WORKDIR}/vllm-omni RUN cd ${COMMON_WORKDIR}/vllm-omni && python3 -m pip install --no-cache-dir ".[dev]" # Create python symlink +RUN export GPU_ARCHS="gfx942;gfx950" RUN ln -sf /usr/bin/python3 /usr/bin/python ENTRYPOINT [] diff --git a/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml b/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml new file mode 100644 index 00000000000..e9f87be387d --- /dev/null +++ b/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml @@ -0,0 +1,98 @@ +# Stage config for running Qwen3-Omni-MoE with 3-stage architecture +# Stage 0: Thinker (multimodal understanding + text generation) +# Stage 1: Talker (text embeddings → 16-layer RVQ codec codes) +# Stage 2: Code2Wav (8-layer RVQ codes → audio waveform) + +# The following config has been verified on 2x H100-80G GPUs. +stage_args: + - stage_id: 0 + runtime: + devices: "0,1" + max_batch_size: 1 + engine_args: + model_stage: thinker + model_arch: Qwen3OmniMoeForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + gpu_memory_utilization: 0.6 + enforce_eager: true + trust_remote_code: true + engine_output_type: latent # Output hidden states for talker + distributed_executor_backend: "mp" + enable_prefix_caching: false + hf_config_name: thinker_config + tensor_parallel_size: 2 + load_format: dummy + final_output: true + final_output_type: text + is_comprehension: true + default_sampling_params: + temperature: 0.4 + top_p: 0.9 + top_k: 1 + max_tokens: 100 + seed: 42 + detokenize: True + repetition_penalty: 1.05 + + - stage_id: 1 + runtime: + devices: "1" + max_batch_size: 1 + engine_args: + model_stage: talker + model_arch: Qwen3OmniMoeForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + gpu_memory_utilization: 0.5 + enforce_eager: true + trust_remote_code: true + engine_output_type: latent # Output codec codes for code2wav + # tensor_parallel_size: 2 + enable_prefix_caching: false + distributed_executor_backend: "mp" + hf_config_name: talker_config + load_format: dummy + engine_input_source: [0] + custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.thinker2talker + # final_output: true + # final_output_type: text + default_sampling_params: + temperature: 0.9 + top_k: 50 + max_tokens: 100 + seed: 42 + detokenize: False + repetition_penalty: 1.05 + stop_token_ids: [2150] + + - stage_id: 2 + runtime: + devices: "0" + max_batch_size: 1 + engine_args: + model_stage: code2wav + model_arch: Qwen3OmniMoeForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_generation_worker.GPUGenerationWorker + scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler + enforce_eager: true + trust_remote_code: true + enable_prefix_caching: false + engine_output_type: audio # Final output: audio waveform + gpu_memory_utilization: 0.1 + distributed_executor_backend: "mp" + max_num_batched_tokens: 1000000 + hf_config_name: thinker_config + load_format: dummy + engine_input_source: [1] + custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.talker2code2wav + final_output: true + final_output_type: audio + default_sampling_params: + temperature: 0.0 + top_p: 1.0 + top_k: -1 + max_tokens: 200 + seed: 42 + detokenize: True + repetition_penalty: 1.1 diff --git a/tests/e2e/offline_inference/test_qwen2_5_omni.py b/tests/e2e/offline_inference/test_qwen2_5_omni.py index 45e8449284f..f0ac1723160 100644 --- a/tests/e2e/offline_inference/test_qwen2_5_omni.py +++ b/tests/e2e/offline_inference/test_qwen2_5_omni.py @@ -12,7 +12,7 @@ from vllm.assets.video import VideoAsset from vllm.multimodal.image import convert_image_mode -from vllm_omni.utils.platform_utils import detect_device_type +from vllm_omni.utils.platform_utils import is_rocm from .conftest import OmniRunner @@ -21,9 +21,9 @@ # CI stage config optimized for 24GB GPU (L4/RTX3090) stage_configs = [str(Path(__file__).parent / "stage_configs" / "qwen2_5_omni_ci.yaml")] -if detect_device_type() != "cuda": +if is_rocm(): # ROCm stage config optimized for MI325 GPU - stage_configs = [str(Path(__file__).parent / "stage_configs" / detect_device_type() / "qwen2_5_omni_ci.yaml")] + stage_configs = [str(Path(__file__).parent / "stage_configs" / "rocm" / "qwen2_5_omni_ci.yaml")] # Create parameter combinations for model and stage config test_params = [(model, stage_config) for model in models for stage_config in stage_configs] diff --git a/tests/e2e/offline_inference/test_qwen3_omni.py b/tests/e2e/offline_inference/test_qwen3_omni.py index b43fa836174..ec1fc4f15ac 100644 --- a/tests/e2e/offline_inference/test_qwen3_omni.py +++ b/tests/e2e/offline_inference/test_qwen3_omni.py @@ -10,6 +10,8 @@ import pytest from vllm.assets.video import VideoAsset +from vllm_omni.utils.platform_utils import is_rocm + from .conftest import OmniRunner os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" @@ -19,6 +21,10 @@ # CI stage config for 2xH100-80G GPUs stage_configs = [str(Path(__file__).parent / "stage_configs" / "qwen3_omni_ci.yaml")] +if is_rocm(): + # ROCm stage config optimized for MI325 GPU + stage_configs = [str(Path(__file__).parent / "stage_configs" / "rocm" / "qwen2_5_omni_ci.yaml")] + # Create parameter combinations for model and stage config test_params = [(model, stage_config) for model in models for stage_config in stage_configs] diff --git a/vllm_omni/entrypoints/utils.py b/vllm_omni/entrypoints/utils.py index e092325f8f1..5256abc3a3e 100644 --- a/vllm_omni/entrypoints/utils.py +++ b/vllm_omni/entrypoints/utils.py @@ -7,7 +7,7 @@ from omegaconf import OmegaConf from vllm.transformers_utils.config import get_config -from vllm_omni.utils import detect_device_type +from vllm_omni.utils import detect_device_type, is_rocm # Get the project root directory (2 levels up from this file) PROJECT_ROOT = Path(__file__).parent.parent.parent @@ -83,8 +83,10 @@ def resolve_model_config_path(model: str) -> str: device_type = detect_device_type() # Try device-specific config first - if device_type != "cuda": + if device_type != "cuda" or is_rocm(): device_config_file = f"vllm_omni/model_executor/stage_configs/{device_type}/{model_type}.yaml" + if is_rocm(): + device_config_file = f"vllm_omni/model_executor/stage_configs/rocm/{model_type}.yaml" device_config_path = PROJECT_ROOT / device_config_file if os.path.exists(device_config_path): return str(device_config_path) diff --git a/vllm_omni/model_executor/stage_configs/rocm/qwen2_5_omni.yaml b/vllm_omni/model_executor/stage_configs/rocm/qwen2_5_omni.yaml new file mode 100644 index 00000000000..c646aa76a9d --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/rocm/qwen2_5_omni.yaml @@ -0,0 +1,102 @@ +# stage config for running qwen2.5-omni with architecture of OmniLLM. + +# The following config has been verified on 2x H100-80G GPU. +stage_args: + - stage_id: 0 + runtime: + process: true # Run this stage in a separate process + devices: "0" # Visible devices for this stage (CUDA_VISIBLE_DEVICES/torch.cuda.set_device) + max_batch_size: 1 + engine_args: + model_stage: thinker + model_arch: Qwen2_5OmniForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + gpu_memory_utilization: 0.8 + enforce_eager: true # Now we only support eager mode + trust_remote_code: true + engine_output_type: latent + enable_prefix_caching: false + max_num_batched_tokens: 32768 + is_comprehension: true + final_output: true + final_output_type: text + default_sampling_params: + temperature: 0.0 + top_p: 1.0 + top_k: -1 + max_tokens: 2048 + seed: 42 + detokenize: True + repetition_penalty: 1.1 + + - stage_id: 1 + runtime: + process: true + devices: "1" + max_batch_size: 1 + engine_args: + model_stage: talker + model_arch: Qwen2_5OmniForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + gpu_memory_utilization: 0.8 + enforce_eager: true + trust_remote_code: true + enable_prefix_caching: false + max_num_batched_tokens: 32768 + engine_output_type: latent + engine_input_source: [0] + custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen2_5_omni.thinker2talker + default_sampling_params: + temperature: 0.9 + top_p: 0.8 + top_k: 40 + max_tokens: 2048 + seed: 42 + detokenize: True + repetition_penalty: 1.05 + stop_token_ids: [8294] + + - stage_id: 2 + runtime: + process: true + devices: "2" # Example: use a different GPU than the previous stage; use "0" if single GPU + max_batch_size: 1 + engine_args: + model_stage: code2wav + model_arch: Qwen2_5OmniForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_generation_worker.GPUGenerationWorker + scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler + gpu_memory_utilization: 0.15 + enforce_eager: true + trust_remote_code: true + enable_prefix_caching: false + max_num_batched_tokens: 32768 + engine_output_type: audio + engine_input_source: [1] + final_output: true + final_output_type: audio + default_sampling_params: + temperature: 0.0 + top_p: 1.0 + top_k: -1 + max_tokens: 2048 + seed: 42 + detokenize: True + repetition_penalty: 1.1 + +# Top-level runtime config (concise): default windows and stage edges +runtime: + enabled: true + defaults: + window_size: -1 # Simplified: trigger downstream only after full upstream completion + max_inflight: 1 # Simplified: process serially within each stage + + edges: + - from: 0 # thinker → talker: trigger only after receiving full input (-1) + to: 1 + window_size: -1 + - from: 1 # talker → code2wav: trigger only after receiving full input (-1) + to: 2 + window_size: -1 diff --git a/vllm_omni/model_executor/stage_configs/rocm/qwen3_omni_moe.yaml b/vllm_omni/model_executor/stage_configs/rocm/qwen3_omni_moe.yaml new file mode 100644 index 00000000000..73f65ecb557 --- /dev/null +++ b/vllm_omni/model_executor/stage_configs/rocm/qwen3_omni_moe.yaml @@ -0,0 +1,97 @@ +# Stage config for running Qwen3-Omni-MoE with 3-stage architecture +# Stage 0: Thinker (multimodal understanding + text generation) +# Stage 1: Talker (text embeddings → 8-layer RVQ codec codes) +# Stage 2: Code2Wav (8-layer RVQ codes → audio waveform) + +# The following config has been verified on 2x H100-80G GPUs. +stage_args: + - stage_id: 0 + runtime: + devices: "0,1" + max_batch_size: 1 + engine_args: + model_stage: thinker + model_arch: Qwen3OmniMoeForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + gpu_memory_utilization: 0.6 + enforce_eager: true + trust_remote_code: true + engine_output_type: latent # Output hidden states for talker + distributed_executor_backend: "mp" + enable_prefix_caching: false + max_num_batched_tokens: 32768 + hf_config_name: thinker_config + tensor_parallel_size: 2 + final_output: true + final_output_type: text + is_comprehension: true + default_sampling_params: + temperature: 0.4 + top_p: 0.9 + top_k: 1 + max_tokens: 2048 + seed: 42 + detokenize: True + repetition_penalty: 1.05 + + - stage_id: 1 + runtime: + devices: "1" + max_batch_size: 1 + engine_args: + model_stage: talker + model_arch: Qwen3OmniMoeForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker + scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler + gpu_memory_utilization: 0.3 + enforce_eager: true + trust_remote_code: true + engine_output_type: latent # Output codec codes for code2wav + # tensor_parallel_size: 2 + enable_prefix_caching: false + max_num_batched_tokens: 32768 + distributed_executor_backend: "mp" + hf_config_name: talker_config + engine_input_source: [0] + custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.thinker2talker + # final_output: true + # final_output_type: text + default_sampling_params: + temperature: 0.9 + top_k: 50 + max_tokens: 4096 + seed: 42 + detokenize: False + repetition_penalty: 1.05 + stop_token_ids: [2150] + + - stage_id: 2 + runtime: + devices: "0" + max_batch_size: 1 + engine_args: + model_stage: code2wav + model_arch: Qwen3OmniMoeForConditionalGeneration + worker_cls: vllm_omni.worker.gpu_generation_worker.GPUGenerationWorker + scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler + enforce_eager: true + trust_remote_code: true + enable_prefix_caching: false + engine_output_type: audio # Final output: audio waveform + gpu_memory_utilization: 0.1 + distributed_executor_backend: "mp" + max_num_batched_tokens: 1000000 + hf_config_name: thinker_config + engine_input_source: [1] + custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.talker2code2wav + final_output: true + final_output_type: audio + default_sampling_params: + temperature: 0.0 + top_p: 1.0 + top_k: -1 + max_tokens: 65536 + seed: 42 + detokenize: True + repetition_penalty: 1.1 diff --git a/vllm_omni/utils/__init__.py b/vllm_omni/utils/__init__.py index 50dbb478d90..34b2545db59 100644 --- a/vllm_omni/utils/__init__.py +++ b/vllm_omni/utils/__init__.py @@ -2,10 +2,12 @@ detect_device_type, get_device_control_env_var, is_npu, + is_rocm, ) __all__ = [ "detect_device_type", "get_device_control_env_var", "is_npu", + "is_rocm", ] diff --git a/vllm_omni/utils/platform_utils.py b/vllm_omni/utils/platform_utils.py index 679471415f0..5f8259ab83d 100644 --- a/vllm_omni/utils/platform_utils.py +++ b/vllm_omni/utils/platform_utils.py @@ -6,8 +6,6 @@ def detect_device_type() -> str: device_type = getattr(current_platform, "device_type", None) - if current_platform.is_rocm(): - return "rocm" if isinstance(device_type, str) and device_type: return device_type.lower() if torch.cuda.is_available(): @@ -22,7 +20,7 @@ def is_npu() -> bool: def is_rocm() -> bool: - return detect_device_type() == "rocm" + return current_platform.is_rocm() def get_device_control_env_var() -> str: From 3532ec7da180206be99925b347937664592662df Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Tue, 16 Dec 2025 16:31:22 +0000 Subject: [PATCH 16/32] fix the file pointed by qwen3 omni test Signed-off-by: tjtanaa --- tests/e2e/offline_inference/test_qwen3_omni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/offline_inference/test_qwen3_omni.py b/tests/e2e/offline_inference/test_qwen3_omni.py index ec1fc4f15ac..9fa07839f0e 100644 --- a/tests/e2e/offline_inference/test_qwen3_omni.py +++ b/tests/e2e/offline_inference/test_qwen3_omni.py @@ -23,7 +23,7 @@ if is_rocm(): # ROCm stage config optimized for MI325 GPU - stage_configs = [str(Path(__file__).parent / "stage_configs" / "rocm" / "qwen2_5_omni_ci.yaml")] + stage_configs = [str(Path(__file__).parent / "stage_configs" / "rocm" / "qwen3_omni_ci.yaml")] # Create parameter combinations for model and stage config test_params = [(model, stage_config) for model in models for stage_config in stage_configs] From 056fe9afb1ae1fe00220edd4ce3ba0ea20dcfd0a Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 17 Dec 2025 04:17:27 +0000 Subject: [PATCH 17/32] trying to fix aiter mi325x arch auto detection issue Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 21 +++++++++++++++++++++ docker/Dockerfile.rocm | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index ab66b4d61a5..fc45004f91e 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -9,6 +9,7 @@ steps: source_file_dependencies: - ./ commands: + - export GPU_ARCHS=gfx942 - export MIOPEN_FIND_MODE=FAST - export VLLM_ROCM_USE_AITER=1 - export VLLM_ROCM_USE_AITER_MHA=1 @@ -16,6 +17,24 @@ steps: - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py +- label: "Diffusion Cache Backend Test" + timeout_in_minutes: 15 + agent_pool: mi325_1 + depends_on: amd-build + mirror_hardwares: [amdexperimental, amdproduction, amdtentative] + source_file_dependencies: + - ./ + commands: + - export GPU_ARCHS=gfx942 + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - export MIOPEN_FIND_MODE=FAST + - export VLLM_ROCM_USE_AITER=1 + - export VLLM_ROCM_USE_AITER_MHA=1 + - export VLLM_ROCM_USE_AITER_LINEAR=0 + - export VLLM_ROCM_USE_AITER_RMSNORM=0 + - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py + - label: "Omni Model Test Qwen2-5-Omni" timeout_in_minutes: 15 agent_pool: mi325_2 @@ -24,6 +43,7 @@ steps: source_file_dependencies: - ./ commands: + - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export MIOPEN_FIND_MODE=FAST @@ -41,6 +61,7 @@ steps: source_file_dependencies: - ./ commands: + - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - export MIOPEN_FIND_MODE=FAST diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 849539e3a90..c8093330901 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -31,7 +31,7 @@ COPY . ${COMMON_WORKDIR}/vllm-omni RUN cd ${COMMON_WORKDIR}/vllm-omni && python3 -m pip install --no-cache-dir ".[dev]" # Create python symlink -RUN export GPU_ARCHS="gfx942;gfx950" +ENV GPU_ARCHS="gfx942;gfx950" RUN ln -sf /usr/bin/python3 /usr/bin/python ENTRYPOINT [] From 7c689e3668143d3e4d7d077b0345fd2788298a07 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 17 Dec 2025 04:46:59 +0000 Subject: [PATCH 18/32] fix the rocm qwen3 omni unit test Signed-off-by: tjtanaa --- .../offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml b/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml index e9f87be387d..fb955f72478 100644 --- a/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml +++ b/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml @@ -14,7 +14,7 @@ stage_args: model_arch: Qwen3OmniMoeForConditionalGeneration worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler - gpu_memory_utilization: 0.6 + gpu_memory_utilization: 0.4 enforce_eager: true trust_remote_code: true engine_output_type: latent # Output hidden states for talker @@ -44,7 +44,7 @@ stage_args: model_arch: Qwen3OmniMoeForConditionalGeneration worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler - gpu_memory_utilization: 0.5 + gpu_memory_utilization: 0.4 enforce_eager: true trust_remote_code: true engine_output_type: latent # Output codec codes for code2wav From ce45b1fac1fe8ff4244ad970633a1345a915a82b Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 17 Dec 2025 14:17:44 +0000 Subject: [PATCH 19/32] remove qwen3 unit tests first; reuse AITER_ROCM_ARCH from base image Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 18 ------------------ docker/Dockerfile.rocm | 7 ++++++- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index fc45004f91e..fb84b57c74e 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -52,21 +52,3 @@ steps: - export VLLM_ROCM_USE_AITER_LINEAR=0 - export VLLM_ROCM_USE_AITER_RMSNORM=0 - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py - -- label: "Omni Model Test Qwen3-Omni" - timeout_in_minutes: 15 - agent_pool: mi325_2 - depends_on: amd-build - mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - source_file_dependencies: - - ./ - commands: - - export GPU_ARCHS=gfx942 - - export VLLM_LOGGING_LEVEL=DEBUG - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_FIND_MODE=FAST - - export VLLM_ROCM_USE_AITER=1 - - export VLLM_ROCM_USE_AITER_MHA=1 - - export VLLM_ROCM_USE_AITER_LINEAR=0 - - export VLLM_ROCM_USE_AITER_RMSNORM=0 - - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index c8093330901..1885306d3fd 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -31,7 +31,12 @@ COPY . ${COMMON_WORKDIR}/vllm-omni RUN cd ${COMMON_WORKDIR}/vllm-omni && python3 -m pip install --no-cache-dir ".[dev]" # Create python symlink -ENV GPU_ARCHS="gfx942;gfx950" +# `GPU_ARCHS` is an environment variable that is used to set the GPU archs for the AITER. +# This is needed to prevent the AITER automatic GPU arch detection from failing on MI325X. +# The AITER version used in this dockerfile has issues with handling +# the GPU archs of MI325X (CI machine) correctly. So we manually set the GPU archs here. +# We reuse AITER_ROCM_ARCH from the base image to avoid duplication. +ENV GPU_ARCHS=${AITER_ROCM_ARCH} RUN ln -sf /usr/bin/python3 /usr/bin/python ENTRYPOINT [] From 5e9c4d3718e728dcac46025dfe4a61a808e3252b Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 17 Dec 2025 14:24:08 +0000 Subject: [PATCH 20/32] remove print Signed-off-by: tjtanaa --- tests/e2e/offline_inference/test_qwen2_5_omni.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/offline_inference/test_qwen2_5_omni.py b/tests/e2e/offline_inference/test_qwen2_5_omni.py index 2b7dd4403ab..f5f9894b386 100644 --- a/tests/e2e/offline_inference/test_qwen2_5_omni.py +++ b/tests/e2e/offline_inference/test_qwen2_5_omni.py @@ -37,7 +37,6 @@ def test_mixed_modalities_to_audio(omni_runner: type[OmniRunner], test_config: tuple[str, str]) -> None: """Test processing audio, image, and video together, generating audio output.""" model, stage_config_path = test_config - print(f"Running test for model: {model} and stage config: {stage_config_path}") with omni_runner(model, seed=42, stage_configs_path=stage_config_path) as runner: # Prepare multimodal inputs question = "What is recited in the audio? What is in this image? Describe the video briefly." From d865d1800cbd95e66b9c16e32be27dcdf414be67 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 17 Dec 2025 14:26:19 +0000 Subject: [PATCH 21/32] simplify more Signed-off-by: tjtanaa --- .../scripts/hardware_ci/run-amd-test.sh | 88 ------------------- 1 file changed, 88 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 64e97011768..a291f1b8c47 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -82,94 +82,6 @@ HF_MOUNT="/root/.cache/huggingface" commands=$@ echo "Commands:$commands" -commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"pytest -v -s basic_correctness/test_basic_correctness.py"} - -if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then - commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"} -fi - -commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"pytest -v -s compile/test_basic_correctness.py"} - -if [[ $commands == *"pytest -v -s lora"* ]]; then - commands=${commands//"pytest -v -s lora"/"VLLM_ROCM_CUSTOM_PAGED_ATTN=0 pytest -v -s lora"} -fi - -#ignore certain kernels tests -if [[ $commands == *" kernels/core"* ]]; then - commands="${commands} \ - --ignore=kernels/core/test_fused_quant_layernorm.py \ - --ignore=kernels/core/test_permute_cols.py" -fi - -if [[ $commands == *" kernels/attention"* ]]; then - commands="${commands} \ - --ignore=kernels/attention/test_attention_selector.py \ - --ignore=kernels/attention/test_encoder_decoder_attn.py \ - --ignore=kernels/attention/test_flash_attn.py \ - --ignore=kernels/attention/test_flashinfer.py \ - --ignore=kernels/attention/test_prefix_prefill.py \ - --ignore=kernels/attention/test_cascade_flash_attn.py \ - --ignore=kernels/attention/test_mha_attn.py \ - --ignore=kernels/attention/test_lightning_attn.py \ - --ignore=kernels/attention/test_attention.py" -fi - -if [[ $commands == *" kernels/quantization"* ]]; then - commands="${commands} \ - --ignore=kernels/quantization/test_int8_quant.py \ - --ignore=kernels/quantization/test_machete_mm.py \ - --ignore=kernels/quantization/test_block_fp8.py \ - --ignore=kernels/quantization/test_block_int8.py \ - --ignore=kernels/quantization/test_marlin_gemm.py \ - --ignore=kernels/quantization/test_cutlass_scaled_mm.py \ - --ignore=kernels/quantization/test_int8_kernel.py" -fi - -if [[ $commands == *" kernels/mamba"* ]]; then - commands="${commands} \ - --ignore=kernels/mamba/test_mamba_mixer2.py \ - --ignore=kernels/mamba/test_causal_conv1d.py \ - --ignore=kernels/mamba/test_mamba_ssm_ssd.py" -fi - -if [[ $commands == *" kernels/moe"* ]]; then - commands="${commands} \ - --ignore=kernels/moe/test_moe.py \ - --ignore=kernels/moe/test_cutlass_moe.py \ - --ignore=kernels/moe/test_triton_moe_ptpc_fp8.py" -fi - -#ignore certain Entrypoints/openai tests -if [[ $commands == *" entrypoints/openai "* ]]; then - commands=${commands//" entrypoints/openai "/" entrypoints/openai \ - --ignore=entrypoints/openai/test_audio.py \ - --ignore=entrypoints/openai/test_shutdown.py \ - --ignore=entrypoints/openai/test_completion.py \ - --ignore=entrypoints/openai/test_sleep.py \ - --ignore=entrypoints/openai/test_models.py \ - --ignore=entrypoints/openai/test_lora_adapters.py \ - --ignore=entrypoints/openai/test_return_tokens_as_ids.py \ - --ignore=entrypoints/openai/test_root_path.py \ - --ignore=entrypoints/openai/test_tokenization.py \ - --ignore=entrypoints/openai/test_prompt_validation.py "} -fi - -#ignore certain Entrypoints/llm tests -if [[ $commands == *" entrypoints/llm "* ]]; then - commands=${commands//" entrypoints/llm "/" entrypoints/llm \ - --ignore=entrypoints/llm/test_chat.py \ - --ignore=entrypoints/llm/test_accuracy.py \ - --ignore=entrypoints/llm/test_init.py \ - --ignore=entrypoints/llm/test_prompt_validation.py "} -fi - -# --ignore=entrypoints/openai/test_encoder_decoder.py \ -# --ignore=entrypoints/openai/test_embedding.py \ -# --ignore=entrypoints/openai/test_oot_registration.py -# --ignore=entrypoints/openai/test_accuracy.py \ -# --ignore=entrypoints/openai/test_models.py <= Fails on MI250 but passes on MI300 as of 2025-03-13 - - PARALLEL_JOB_COUNT=8 MYPYTHONPATH=".." From 5053c1bd08cb92d6fdd09565f94d682ca48bc72c Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 17 Dec 2025 16:15:34 +0000 Subject: [PATCH 22/32] keep the template small Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 8 +- .buildkite/test-template-amd-omni.j2 | 286 +-------------------------- 2 files changed, 4 insertions(+), 290 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index fb84b57c74e..9a47f5e9b04 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -6,8 +6,6 @@ steps: depends_on: amd-build mirror_hardwares: [amdexperimental, amdproduction, amdtentative] grade: Blocking - source_file_dependencies: - - ./ commands: - export GPU_ARCHS=gfx942 - export MIOPEN_FIND_MODE=FAST @@ -22,8 +20,7 @@ steps: agent_pool: mi325_1 depends_on: amd-build mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - source_file_dependencies: - - ./ + grade: Blocking commands: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG @@ -40,8 +37,7 @@ steps: agent_pool: mi325_2 depends_on: amd-build mirror_hardwares: [amdexperimental, amdproduction, amdtentative] - source_file_dependencies: - - ./ + grade: Blocking commands: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG diff --git a/.buildkite/test-template-amd-omni.j2 b/.buildkite/test-template-amd-omni.j2 index 47a207580a4..0b6eb8f54b2 100644 --- a/.buildkite/test-template-amd-omni.j2 +++ b/.buildkite/test-template-amd-omni.j2 @@ -1,292 +1,10 @@ {# vllm-omni customized version Based on: https://github.com/vllm-project/ci-infra/blob/main/buildkite/test-template-amd.j2 Last synced: 2025-12-15 - Modifications: docker image name, default working dir, build queue, build commands for vllm-omni + Modifications: Removed unused CUDA/NVIDIA logic, keeping only AMD tests #} -{% set cov_enabled = (cov_enabled == "1") %} -{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} -{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-torch-nightly" %} -{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cu118" %} -{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cpu" %} -{% if branch == "main" %} -{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %} -{% set docker_image_latest = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest" %} -{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-torch-nightly" %} -{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %} -{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %} -{% endif %} {% set docker_image_amd = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-rocm-omni" %} {% set default_working_dir = "/app/vllm-omni" %} -{% set hf_home = "/root/.cache/huggingface" %} -{% set hf_home_efs = "/mnt/efs/hf_cache" %} -{% set hf_home_fsx = "/fsx/hf_cache" %} -{% set list_file_diff = list_file_diff | split("|") %} - -{# Intelligent test targeting: Detect when only test files changed and collect them #} -{%- set tests_acc = namespace(only_tests=true, any=false, changed=[]) %} -{%- for file in list_file_diff %} -{%- if file[:6] == 'tests/' and '/test_' in file and file[-3:] == '.py' %} -{%- set tests_acc.any = true %} -{%- set tests_acc.changed = tests_acc.changed + [file[6:]] %} -{%- else %} -{%- set tests_acc.only_tests = false %} -{%- endif %} -{%- endfor %} -{%- set tests_only = (tests_acc.only_tests and tests_acc.any) %} -{%- set changed_tests = tests_acc.changed %} - -{% macro add_pytest_coverage(cmd, coverage_file) %} -{% if "pytest " in cmd %} -COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm --cov-report=xml --cov-append --durations=0 ") }} || true -{% else %} -{{ cmd }} -{% endif %} -{% endmacro %} - -{% macro add_docker_pytest_coverage(step, cov_enabled) %} -{# Compute coverage file id #} -{% set step_length = step.label | length %} -{% set step_first = step.label | first | default("x") %} -{% set coverage_file = ".coverage." + step_length ~ "_" ~ step_first %} - -{# Intelligent test targeting: Build matched test targets for this step when only tests changed #} -{%- set match_ns = namespace(targets=[]) %} -{%- if tests_only and step.source_file_dependencies %} -{%- for dep in step.source_file_dependencies %} -{%- if dep[:6] == 'tests/' %} -{%- set dep_rel = dep[6:] %} -{# Handle deps that already end with '/' (e.g., tests/benchmarks/) #} -{%- if dep_rel[-1:] == '/' %} -{%- set dep_dir_prefix = dep_rel %} -{%- set dep_file_name = dep_rel[:-1] ~ '.py' %} -{%- else %} -{%- set dep_dir_prefix = dep_rel ~ '/' %} -{%- set dep_file_name = dep_rel ~ '.py' %} -{%- endif %} -{%- for t in changed_tests %} -{# Check if t starts with dep_dir_prefix (for directories) or equals dep_file_name (for files) #} -{%- set prefix_len = dep_dir_prefix | length %} -{%- set t_prefix = t[:prefix_len] %} -{%- set cond1 = (t | length >= prefix_len and t_prefix == dep_dir_prefix) %} -{%- set cond2 = (t == dep_file_name) %} -{%- if cond1 or cond2 %} -{%- set match_ns.targets = match_ns.targets + [t] %} -{%- endif %} -{%- endfor %} -{%- endif %} -{%- endfor %} -{%- endif %} -{%- set matched_targets = match_ns.targets %} - -{# If we have matched targets, run only those specific tests #} -{% if matched_targets | length > 0 %} -pytest -v -s {{ matched_targets | join(' ') }} -{% else %} -{# Default behavior: preserve original commands with optional coverage injection #} -{% if cov_enabled %} -{% set ns = namespace(has_pytest=false) %} -{% if step.command %} -{% if "pytest " in step.command %}{% set ns.has_pytest = true %}{% endif %} -{{ add_pytest_coverage(step.command, coverage_file) }} -{% else %} -{% for cmd in step.commands %} -{% if "pytest " in cmd %}{% set ns.has_pytest = true %}{% endif %} -{{ add_pytest_coverage(cmd, coverage_file) }}{{ " && " if not loop.last else "" }}{% endfor %} -{% endif %}{% if ns.has_pytest %} && curl -sSL https://raw.githubusercontent.com/vllm-project/ci-infra/{{ vllm_ci_branch | default('main') }}/buildkite/scripts/upload_codecov.sh | bash -s -- \"{{ step.label }}\"{% endif %} -{% else %} -{{ step.command or (step.commands | join(' && ')) | safe }} -{% endif %} -{% endif %} -{% endmacro %} - -{% macro render_cuda_config(step, image, default_working_dir, hf_home_fsx, hf_home, branch) %} -agents: - {% if step.label == "Documentation Build" %} - queue: small_cpu_queue_premerge - {% elif step.no_gpu %} - queue: cpu_queue_premerge_us_east_1 - {% elif step.gpu == "a100" %} - queue: a100_queue - {% elif step.gpu == "h100" %} - queue: mithril-h100-pool - {% elif step.gpu == "h200" %} - queue: skylab-h200 - {% elif step.gpu == "b200" %} - queue: B200 - {% elif step.num_gpus == 2 or step.num_gpus == 4 %} - queue: gpu_4_queue - {% else %} - queue: gpu_1_queue - {% endif %} - -{% if step.num_nodes >= 2 %} -commands: - - ./.buildkite/scripts/run-multi-node-test.sh {{ (step.working_dir or default_working_dir) | safe }} {{ step.num_nodes }} {{ step.num_gpus }} {{ image }} {% for command in step.commands %}"{{ (command | join(' && ')) | safe }}" {% endfor %} -{% endif %} - -{% if step.parallelism %} -parallelism: {{ step.parallelism }} -{% endif %} - -retry: - automatic: - - exit_status: -1 - limit: 1 - - exit_status: -10 - limit: 1 - -{% if step.num_nodes < 2 %} -plugins: - {% if step.gpu != "a100" and step.gpu != "h100" and step.gpu != "h200" and step.gpu != "b200" %} - - docker#v5.2.0: - image: {{ image }} - always-pull: true - propagate-environment: true - {% if not step.no_gpu %} - gpus: all - {% endif %} - {% if step.label == "Benchmarks" or step.mount_buildkite_agent or cov_enabled %} - mount-buildkite-agent: true - {% endif %} - command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"] - environment: - - VLLM_USAGE_SOURCE=ci-test - - NCCL_CUMEM_HOST_ENABLE=0 - - HF_HOME={{ hf_home_fsx }} - - HF_TOKEN - - CODECOV_TOKEN - {% if fail_fast == "true" %} - - PYTEST_ADDOPTS=-x - {% endif %} - {% if branch == "main" %} - - BUILDKITE_ANALYTICS_TOKEN - {% endif %} - {% if step.label == "Speculative decoding tests" %} - - VLLM_ATTENTION_BACKEND=XFORMERS - {% endif %} - volumes: - - /dev/shm:/dev/shm - - {{ hf_home_fsx }}:{{ hf_home_fsx }} - {% elif step.gpu == "h200" %} - - docker#v5.2.0: - image: {{ image }} - always-pull: true - propagate-environment: true - gpus: all - command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"] - environment: - - VLLM_USAGE_SOURCE=ci-test - - NCCL_CUMEM_HOST_ENABLE=0 - - HF_HOME=/benchmark-hf-cache - - HF_TOKEN - - CODECOV_TOKEN - {% if fail_fast == "true" %} - - PYTEST_ADDOPTS=-x - {% endif %} - {% if branch == "main" %} - - BUILDKITE_ANALYTICS_TOKEN - {% endif %} - volumes: - - /dev/shm:/dev/shm - - /data/benchmark-hf-cache:/benchmark-hf-cache - - /data/benchmark-vllm-cache:/root/.cache/vllm - {% elif step.gpu == "b200" %} - - docker#v5.2.0: - image: {{ image }} - always-pull: true - propagate-environment: true - # gpus will be configured by BUILDKITE_PLUGIN_DOCKER_GPUS in per host environment variable. - # gpus: all - command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"] - environment: - - VLLM_USAGE_SOURCE=ci-test - - NCCL_CUMEM_HOST_ENABLE=0 - - HF_HOME=/benchmark-hf-cache - - HF_TOKEN - - CODECOV_TOKEN - {% if fail_fast == "true" %} - - PYTEST_ADDOPTS=-x - {% endif %} - {% if branch == "main" %} - - BUILDKITE_ANALYTICS_TOKEN - {% endif %} - volumes: - - /dev/shm:/dev/shm - - /data/benchmark-hf-cache:/benchmark-hf-cache - - /data/benchmark-vllm-cache:/root/.cache/vllm - {% elif step.gpu == "h100" %} - - kubernetes: - podSpec: - containers: - - image: {{ image }} - command: - - bash -c "{{ '(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd ' ~ ((step.working_dir or default_working_dir) | safe) ~ ' && ' ~ (step.command or (step.commands | join(" && ")) | safe) }}" - resources: - limits: - nvidia.com/gpu: {{ step.num_gpus or 1 }} - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: {{ hf_home }} - env: - - name: VLLM_USAGE_SOURCE - value: ci-test - - name: NCCL_CUMEM_HOST_ENABLE - value: "0" - - name: HF_HOME - value: {{ hf_home }} - nodeSelector: - nvidia.com/gpu.product: NVIDIA-H100-80GB-HBM3 - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: /mnt/hf-cache - type: Directory - {% else %} - - kubernetes: - podSpec: - priorityClassName: ci - containers: - - image: {{ image }} - command: - - bash -c "{{ '(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd ' ~ ((step.working_dir or default_working_dir) | safe) ~ ' && ' ~ (step.command or (step.commands | join(" && ")) | safe) }}" - resources: - limits: - nvidia.com/gpu: {{ step.num_gpus or 1 }} - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: {{ hf_home }} - env: - - name: VLLM_USAGE_SOURCE - value: ci-test - - name: NCCL_CUMEM_HOST_ENABLE - value: "0" - - name: HF_HOME - value: {{ hf_home }} - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: {{ hf_home }} - type: Directory - {% endif %} -{% endif %} -{% endmacro %} - group: "AMD Tests" depends_on: ~ @@ -322,7 +40,7 @@ plugins: {% else %} queue: amd_mi325_1 {% endif %} - command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh "(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" + command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh "(command rocm-smi || true) && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" env: DOCKER_BUILDKIT: "1" priority: 100 From c94d67ed487c9ae1f5af5e6143bd4d458e5c4d00 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 18 Dec 2025 02:03:10 +0000 Subject: [PATCH 23/32] remove unwanted print Signed-off-by: tjtanaa --- tests/e2e/offline_inference/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/offline_inference/utils.py b/tests/e2e/offline_inference/utils.py index 82c46ff55dd..931e7b506cb 100644 --- a/tests/e2e/offline_inference/utils.py +++ b/tests/e2e/offline_inference/utils.py @@ -20,7 +20,6 @@ VLLM_PATH = Path(__file__).parent.parent.parent """Path to root of the vLLM repository.""" -print(f"VLLM_PATH: {VLLM_PATH}") _P = ParamSpec("_P") From 5c104bb4172a1c1221db7fe25a3b48e8529941b8 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Thu, 18 Dec 2025 02:10:37 +0000 Subject: [PATCH 24/32] remove qwen3 omni test relate file for now Signed-off-by: tjtanaa --- .../stage_configs/rocm/qwen3_omni_ci.yaml | 98 ------------------- .../e2e/offline_inference/test_qwen3_omni.py | 6 -- 2 files changed, 104 deletions(-) delete mode 100644 tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml diff --git a/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml b/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml deleted file mode 100644 index fb955f72478..00000000000 --- a/tests/e2e/offline_inference/stage_configs/rocm/qwen3_omni_ci.yaml +++ /dev/null @@ -1,98 +0,0 @@ -# Stage config for running Qwen3-Omni-MoE with 3-stage architecture -# Stage 0: Thinker (multimodal understanding + text generation) -# Stage 1: Talker (text embeddings → 16-layer RVQ codec codes) -# Stage 2: Code2Wav (8-layer RVQ codes → audio waveform) - -# The following config has been verified on 2x H100-80G GPUs. -stage_args: - - stage_id: 0 - runtime: - devices: "0,1" - max_batch_size: 1 - engine_args: - model_stage: thinker - model_arch: Qwen3OmniMoeForConditionalGeneration - worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker - scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler - gpu_memory_utilization: 0.4 - enforce_eager: true - trust_remote_code: true - engine_output_type: latent # Output hidden states for talker - distributed_executor_backend: "mp" - enable_prefix_caching: false - hf_config_name: thinker_config - tensor_parallel_size: 2 - load_format: dummy - final_output: true - final_output_type: text - is_comprehension: true - default_sampling_params: - temperature: 0.4 - top_p: 0.9 - top_k: 1 - max_tokens: 100 - seed: 42 - detokenize: True - repetition_penalty: 1.05 - - - stage_id: 1 - runtime: - devices: "1" - max_batch_size: 1 - engine_args: - model_stage: talker - model_arch: Qwen3OmniMoeForConditionalGeneration - worker_cls: vllm_omni.worker.gpu_ar_worker.GPUARWorker - scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler - gpu_memory_utilization: 0.4 - enforce_eager: true - trust_remote_code: true - engine_output_type: latent # Output codec codes for code2wav - # tensor_parallel_size: 2 - enable_prefix_caching: false - distributed_executor_backend: "mp" - hf_config_name: talker_config - load_format: dummy - engine_input_source: [0] - custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.thinker2talker - # final_output: true - # final_output_type: text - default_sampling_params: - temperature: 0.9 - top_k: 50 - max_tokens: 100 - seed: 42 - detokenize: False - repetition_penalty: 1.05 - stop_token_ids: [2150] - - - stage_id: 2 - runtime: - devices: "0" - max_batch_size: 1 - engine_args: - model_stage: code2wav - model_arch: Qwen3OmniMoeForConditionalGeneration - worker_cls: vllm_omni.worker.gpu_generation_worker.GPUGenerationWorker - scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler - enforce_eager: true - trust_remote_code: true - enable_prefix_caching: false - engine_output_type: audio # Final output: audio waveform - gpu_memory_utilization: 0.1 - distributed_executor_backend: "mp" - max_num_batched_tokens: 1000000 - hf_config_name: thinker_config - load_format: dummy - engine_input_source: [1] - custom_process_input_func: vllm_omni.model_executor.stage_input_processors.qwen3_omni.talker2code2wav - final_output: true - final_output_type: audio - default_sampling_params: - temperature: 0.0 - top_p: 1.0 - top_k: -1 - max_tokens: 200 - seed: 42 - detokenize: True - repetition_penalty: 1.1 diff --git a/tests/e2e/offline_inference/test_qwen3_omni.py b/tests/e2e/offline_inference/test_qwen3_omni.py index 9fa07839f0e..b43fa836174 100644 --- a/tests/e2e/offline_inference/test_qwen3_omni.py +++ b/tests/e2e/offline_inference/test_qwen3_omni.py @@ -10,8 +10,6 @@ import pytest from vllm.assets.video import VideoAsset -from vllm_omni.utils.platform_utils import is_rocm - from .conftest import OmniRunner os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" @@ -21,10 +19,6 @@ # CI stage config for 2xH100-80G GPUs stage_configs = [str(Path(__file__).parent / "stage_configs" / "qwen3_omni_ci.yaml")] -if is_rocm(): - # ROCm stage config optimized for MI325 GPU - stage_configs = [str(Path(__file__).parent / "stage_configs" / "rocm" / "qwen3_omni_ci.yaml")] - # Create parameter combinations for model and stage config test_params = [(model, stage_config) for model in models for stage_config in stage_configs] From e6d5b320e7d6c71baf2dfd62eeae71f6e9c4b72d Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Fri, 19 Dec 2025 14:42:56 +0000 Subject: [PATCH 25/32] upgrade vllm version to 0.12.0 following main Signed-off-by: tjtanaa --- docker/Dockerfile.rocm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 1885306d3fd..7fabb9c3c68 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -1,8 +1,8 @@ -ARG BASE_IMAGE=rocm/vllm-dev:nightly_main_20251005 +ARG BASE_IMAGE=rocm/vllm-dev:nightly_main_20251205 FROM ${BASE_IMAGE} ARG COMMON_WORKDIR=/app -ARG VLLM_VERSION=v0.11.0 +ARG VLLM_VERSION=v0.12.0 ARG PYTORCH_ROCM_ARCH="gfx942;gfx950" WORKDIR ${COMMON_WORKDIR} From ef7a50d3f0a3eca1d0eb4fde58860d978f050c88 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sat, 20 Dec 2025 03:01:33 +0000 Subject: [PATCH 26/32] fix import error ModuleNotFoundError: No module named 'vllm.vllm_flash_attn.layers' Signed-off-by: tjtanaa --- vllm_omni/diffusion/layers/rope.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/vllm_omni/diffusion/layers/rope.py b/vllm_omni/diffusion/layers/rope.py index acc0158fc63..8cb1d10d110 100644 --- a/vllm_omni/diffusion/layers/rope.py +++ b/vllm_omni/diffusion/layers/rope.py @@ -1,7 +1,13 @@ +from importlib.util import find_spec + import torch from einops import rearrange, repeat +from vllm.logger import init_logger from vllm_omni.diffusion.layers.custom_op import CustomOp +from vllm_omni.utils.platform_utils import is_rocm + +logger = init_logger(__name__) def rotate_half(x, interleaved=False): @@ -45,6 +51,20 @@ def __init__( super().__init__() self.is_neox_style = is_neox_style self.interleaved = not is_neox_style + self.triton_rotary_emb = None + if is_rocm(): + if find_spec("flash_attn") is not None: + from flash_attn.ops.triton.rotary import apply_rotary + + self.triton_rotary_emb = apply_rotary + else: + logger.warning( + "flash_attn is not installed. Falling back to PyTorch implementation for rotary embeddings." + ) + else: + from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb + + self.triton_rotary_emb = apply_rotary_emb def forward_cuda( self, @@ -52,14 +72,15 @@ def forward_cuda( cos: torch.Tensor, sin: torch.Tensor, ) -> torch.Tensor: - from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb + if self.triton_rotary_emb is None: + return self.forward_native(x, cos, sin) if cos.dim() == 3: # (B, S, D/2) -> (S, D/2) cos = cos[0] sin = sin[0] - return apply_rotary_emb( + return self.triton_rotary_emb( x, cos, sin, From cc225dd32b1572c5a1effae57cce24e6dab7659d Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sat, 20 Dec 2025 19:46:40 +0000 Subject: [PATCH 27/32] add forward_hip instead of sharing the same path with cuda Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 3 ++- vllm_omni/diffusion/layers/rope.py | 42 ++++++++++++++++++------------ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 9a47f5e9b04..fa108e1534e 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -42,7 +42,8 @@ steps: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_FIND_MODE=FAST + - export MIOPEN_DEBUG_CONV_DIRECT=0 + - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - export VLLM_ROCM_USE_AITER_MHA=1 - export VLLM_ROCM_USE_AITER_LINEAR=0 diff --git a/vllm_omni/diffusion/layers/rope.py b/vllm_omni/diffusion/layers/rope.py index 8cb1d10d110..7db87416193 100644 --- a/vllm_omni/diffusion/layers/rope.py +++ b/vllm_omni/diffusion/layers/rope.py @@ -5,7 +5,6 @@ from vllm.logger import init_logger from vllm_omni.diffusion.layers.custom_op import CustomOp -from vllm_omni.utils.platform_utils import is_rocm logger = init_logger(__name__) @@ -51,20 +50,11 @@ def __init__( super().__init__() self.is_neox_style = is_neox_style self.interleaved = not is_neox_style - self.triton_rotary_emb = None - if is_rocm(): - if find_spec("flash_attn") is not None: - from flash_attn.ops.triton.rotary import apply_rotary + self.apply_rotary_emb_flash_attn = None + if find_spec("flash_attn") is not None: + from flash_attn.ops.triton.rotary import apply_rotary - self.triton_rotary_emb = apply_rotary - else: - logger.warning( - "flash_attn is not installed. Falling back to PyTorch implementation for rotary embeddings." - ) - else: - from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb - - self.triton_rotary_emb = apply_rotary_emb + self.apply_rotary_emb_flash_attn = apply_rotary def forward_cuda( self, @@ -72,9 +62,6 @@ def forward_cuda( cos: torch.Tensor, sin: torch.Tensor, ) -> torch.Tensor: - if self.triton_rotary_emb is None: - return self.forward_native(x, cos, sin) - if cos.dim() == 3: # (B, S, D/2) -> (S, D/2) cos = cos[0] @@ -87,6 +74,27 @@ def forward_cuda( interleaved=self.interleaved, ) + def forward_hip( + self, + x: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + ) -> torch.Tensor: + if self.apply_rotary_emb_flash_attn is None: + return self.forward_cuda(x, cos, sin) + + if cos.dim() == 3: + # (B, S, D/2) -> (S, D/2) + cos = cos[0] + sin = sin[0] + + return self.apply_rotary_emb_flash_attn( + x, + cos, + sin, + interleaved=self.interleaved, + ) + def forward_native( self, x: torch.Tensor, From 858f74ec7d9b33d308ce1c40d75a9db0ed6a356e Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sat, 20 Dec 2025 19:59:49 +0000 Subject: [PATCH 28/32] revert forward_cuda Signed-off-by: tjtanaa --- vllm_omni/diffusion/layers/rope.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_omni/diffusion/layers/rope.py b/vllm_omni/diffusion/layers/rope.py index 7db87416193..528f2425efb 100644 --- a/vllm_omni/diffusion/layers/rope.py +++ b/vllm_omni/diffusion/layers/rope.py @@ -62,12 +62,14 @@ def forward_cuda( cos: torch.Tensor, sin: torch.Tensor, ) -> torch.Tensor: + from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb + if cos.dim() == 3: # (B, S, D/2) -> (S, D/2) cos = cos[0] sin = sin[0] - return self.triton_rotary_emb( + return apply_rotary_emb( x, cos, sin, From 5695e2f31acadd26a6b9019ec1521408b9872ab3 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sun, 21 Dec 2025 01:27:05 +0000 Subject: [PATCH 29/32] add forward hip dispatching logic Signed-off-by: tjtanaa --- .buildkite/test-amd.yaml | 6 ++++-- vllm_omni/diffusion/layers/custom_op.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index fa108e1534e..57008e59f53 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -8,7 +8,8 @@ steps: grade: Blocking commands: - export GPU_ARCHS=gfx942 - - export MIOPEN_FIND_MODE=FAST + - export MIOPEN_DEBUG_CONV_DIRECT=0 + - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - export VLLM_ROCM_USE_AITER_MHA=1 - export VLLM_ROCM_USE_AITER_LINEAR=0 @@ -25,7 +26,8 @@ steps: - export GPU_ARCHS=gfx942 - export VLLM_LOGGING_LEVEL=DEBUG - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - export MIOPEN_FIND_MODE=FAST + - export MIOPEN_DEBUG_CONV_DIRECT=0 + - export MIOPEN_DEBUG_CONV_GEMM=0 - export VLLM_ROCM_USE_AITER=1 - export VLLM_ROCM_USE_AITER_MHA=1 - export VLLM_ROCM_USE_AITER_LINEAR=0 diff --git a/vllm_omni/diffusion/layers/custom_op.py b/vllm_omni/diffusion/layers/custom_op.py index 461da0d361e..0bf5c4f60ee 100644 --- a/vllm_omni/diffusion/layers/custom_op.py +++ b/vllm_omni/diffusion/layers/custom_op.py @@ -3,7 +3,7 @@ import torch.nn as nn -from vllm_omni.utils.platform_utils import detect_device_type +from vllm_omni.utils.platform_utils import detect_device_type, is_rocm class CustomOp(nn.Module): @@ -18,7 +18,9 @@ def __init__(self) -> None: self._forward_method = self.dispatch_forward() def dispatch_forward(self) -> Callable: - if self.is_cuda: + if is_rocm(): + return self.forward_hip + elif self.is_cuda: return self.forward_cuda else: return self.forward_native @@ -36,3 +38,7 @@ def forward_native(self, *args, **kwargs): def forward_cuda(self, *args, **kwargs): raise NotImplementedError + + def forward_hip(self, *args, **kwargs): + # By default, we assume that HIP ops are compatible with CUDA ops. + return self.forward_cuda(*args, **kwargs) From 32233aeec5a92c2a85b34a93b72f2ecd093afb5f Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sun, 21 Dec 2025 02:03:47 +0000 Subject: [PATCH 30/32] try to do torch sync when destructing omni runner in tests Signed-off-by: tjtanaa --- tests/e2e/offline_inference/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/e2e/offline_inference/conftest.py b/tests/e2e/offline_inference/conftest.py index a24c63bff7c..b9a72dd9aeb 100644 --- a/tests/e2e/offline_inference/conftest.py +++ b/tests/e2e/offline_inference/conftest.py @@ -7,6 +7,7 @@ from typing import Any import pytest +import torch from vllm.distributed.parallel_state import cleanup_dist_env_and_memory from vllm.sampling_params import SamplingParams @@ -334,6 +335,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.close() del self.omni cleanup_dist_env_and_memory() + torch.cuda.synchronize() def close(self): """Close and cleanup the Omni instance.""" From dd3e6dbfe7f14a9829068f045706613b29f2fddd Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sun, 21 Dec 2025 02:36:03 +0000 Subject: [PATCH 31/32] revert the create_new_process_for_each_test for test_qwen25omni Signed-off-by: tjtanaa --- tests/e2e/offline_inference/conftest.py | 2 -- tests/e2e/offline_inference/test_qwen2_5_omni.py | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/e2e/offline_inference/conftest.py b/tests/e2e/offline_inference/conftest.py index b9a72dd9aeb..a24c63bff7c 100644 --- a/tests/e2e/offline_inference/conftest.py +++ b/tests/e2e/offline_inference/conftest.py @@ -7,7 +7,6 @@ from typing import Any import pytest -import torch from vllm.distributed.parallel_state import cleanup_dist_env_and_memory from vllm.sampling_params import SamplingParams @@ -335,7 +334,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.close() del self.omni cleanup_dist_env_and_memory() - torch.cuda.synchronize() def close(self): """Close and cleanup the Omni instance.""" diff --git a/tests/e2e/offline_inference/test_qwen2_5_omni.py b/tests/e2e/offline_inference/test_qwen2_5_omni.py index f5f9894b386..63eea1ba26d 100644 --- a/tests/e2e/offline_inference/test_qwen2_5_omni.py +++ b/tests/e2e/offline_inference/test_qwen2_5_omni.py @@ -16,6 +16,7 @@ from vllm_omni.utils import is_npu, is_rocm from .conftest import OmniRunner +from .utils import create_new_process_for_each_test models = ["Qwen/Qwen2.5-Omni-3B"] @@ -34,6 +35,7 @@ @pytest.mark.core_model @pytest.mark.parametrize("test_config", test_params) +@create_new_process_for_each_test() def test_mixed_modalities_to_audio(omni_runner: type[OmniRunner], test_config: tuple[str, str]) -> None: """Test processing audio, image, and video together, generating audio output.""" model, stage_config_path = test_config @@ -90,6 +92,7 @@ def test_mixed_modalities_to_audio(omni_runner: type[OmniRunner], test_config: t @pytest.mark.core_model @pytest.mark.parametrize("test_config", test_params) +@create_new_process_for_each_test() def test_mixed_modalities_to_text_only(omni_runner: type[OmniRunner], test_config: tuple[str, str]) -> None: """Test processing audio, image, and video together, generating audio output.""" model, stage_config_path = test_config From fb1b1d87bd0d90b0e5f5b8807c5c1d466d41a26f Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Sun, 21 Dec 2025 03:10:43 +0000 Subject: [PATCH 32/32] fix create_new_process_for_each_test Signed-off-by: tjtanaa --- tests/e2e/offline_inference/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/e2e/offline_inference/utils.py b/tests/e2e/offline_inference/utils.py index 931e7b506cb..c491c10b91e 100644 --- a/tests/e2e/offline_inference/utils.py +++ b/tests/e2e/offline_inference/utils.py @@ -195,7 +195,11 @@ def create_new_process_for_each_test( A decorator to run test functions in separate processes. """ if method is None: - use_spawn = current_platform.is_rocm() or current_platform.is_xpu() + # TODO: Find out why spawn is not working correctly on ROCm + # The test content will not run and tests passed immediately. + # For now, using `fork` for ROCm as it can run with `fork` + # and tests are running correctly. + use_spawn = current_platform.is_xpu() method = "spawn" if use_spawn else "fork" assert method in ["spawn", "fork"], "Method must be either 'spawn' or 'fork'"