vllm-project · wangxiyuan · Feb 6, 2026 · Dec 23, 2025 · Dec 23, 2025 · Dec 23, 2025
@@ -126,6 +126,9 @@ jobs:
           - name: qwen2-5-vl-7b
             os: linux-aarch64-a3-4
             tests: tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b.py
+          - name: qwen2-5-vl-7b-epd
+            os: linux-aarch64-a3-4
+            tests: tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b_epd.py
           - name: qwen2-5-vl-32b
             os: linux-aarch64-a3-4
             tests: tests/e2e/nightly/single_node/models/test_qwen2_5_vl_32b.py

@@ -126,6 +126,8 @@ e2e-multicard-2-cards:
     estimated_time: 1050
   - name: tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
     estimated_time: 215
+  - name: tests/e2e/multicard/2-cards/test_disaggregated_encoder.py
+    estimated_time: 90
 
 e2e-multicard-4-cards:
   # TODO: recover skipped tests

@@ -0,0 +1,206 @@
+#!/bin/bash
+set -euo pipefail
+
+declare -a PIDS=()
+
+###############################################################################
+# Configuration -- override via env before running
+###############################################################################
+MODEL="${MODEL:-Qwen/Qwen2.5-VL-7B-Instruct}"
+LOG_PATH="${LOG_PATH:-./logs}"
+mkdir -p $LOG_PATH
+
+ENCODE_PORT="${ENCODE_PORT:-19534}"
+PREFILL_DECODE_PORT="${PREFILL_DECODE_PORT:-19535}"
+PROXY_PORT="${PROXY_PORT:-10001}"
+
+CARD_E="${CARD_E:-0}"
+CARD_PD="${CARD_PD:-1}"
+
+EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
+TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-12000}"   # wait_for_server timeout
+
+NUM_PROMPTS="${NUM_PROMPTS:-100}"    # number of prompts to send in benchmark
+
+###############################################################################
+# Helpers
+###############################################################################
+# Find the git repository root directory
+VLLM_ROOT="/vllm-workspace/vllm"
+
+START_TIME=$(date +"%Y%m%d_%H%M%S")
+ENC_LOG=$LOG_PATH/encoder_${START_TIME}.log
+PD_LOG=$LOG_PATH/pd_${START_TIME}.log
+PROXY_LOG=$LOG_PATH/proxy_${START_TIME}.log
+
+wait_for_server() {
+    local port=$1
+    timeout "$TIMEOUT_SECONDS" bash -c "
+        until curl -s localhost:$port/v1/chat/completions > /dev/null; do
+            sleep 1
+        done" && return 0 || return 1
+}
+
+# Cleanup function
+cleanup() {
+    echo "Stopping everything…"
+    trap - INT TERM USR1   # prevent re-entrancy
+
+    # Kill all tracked PIDs
+    for pid in "${PIDS[@]}"; do
+        if kill -0 "$pid" 2>/dev/null; then
+            echo "Killing process $pid"
+            kill "$pid" 2>/dev/null
+        fi
+    done
+
+    # Wait a moment for graceful shutdown
+    sleep 2
+
+    # Force kill any remaining processes
+    for pid in "${PIDS[@]}"; do
+        if kill -0 "$pid" 2>/dev/null; then
+            echo "Force killing process $pid"
+            kill -9 "$pid" 2>/dev/null
+        fi
+    done
+
+    # Kill the entire process group as backup
+    kill -- -$$ 2>/dev/null
+
+    echo "All processes stopped."
+    exit 0
+}
+
+trap cleanup INT
+trap cleanup USR1
+trap cleanup TERM
+
+# clear previous cache
+echo "remove previous ec cache folder"
+rm -rf $EC_SHARED_STORAGE_PATH
+
+echo "make ec cache folder"
+mkdir -p $EC_SHARED_STORAGE_PATH
+
+###############################################################################
+# Encoder worker
+###############################################################################
+ASCEND_RT_VISIBLE_DEVICES="$CARD_E" vllm serve "$MODEL" \
+    --gpu-memory-utilization 0.01 \
+    --port "$ENCODE_PORT" \
+    --enforce-eager \
+    --enable-request-id-headers \
+    --no-enable-prefix-caching \
+    --max-num-batched-tokens 114688 \
+    --max-num-seqs 128 \
+    --ec-transfer-config '{
+        "ec_connector": "ECExampleConnector",
+        "ec_role": "ec_producer",
+        "ec_connector_extra_config": {
+            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
+        }
+    }' \
+    >"${ENC_LOG}" 2>&1 &
+
+PIDS+=($!)
+
+###############################################################################
+# Prefill+Decode worker
+###############################################################################
+ASCEND_RT_VISIBLE_DEVICES="$CARD_PD" vllm serve "$MODEL" \
+    --gpu-memory-utilization 0.9 \
+    --port "$PREFILL_DECODE_PORT" \
+    --enforce-eager \
+    --enable-request-id-headers \
+    --max-num-seqs 128 \
+    --ec-transfer-config '{
+        "ec_connector": "ECExampleConnector",
+        "ec_role": "ec_consumer",
+        "ec_connector_extra_config": {
+            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
+        }
+    }' \
+    >"${PD_LOG}" 2>&1 &
+
+PIDS+=($!)
+
+# Wait for workers
+wait_for_server $ENCODE_PORT
+wait_for_server $PREFILL_DECODE_PORT
+
+###############################################################################
+# Proxy
+###############################################################################
+python ./disagg_epd_proxy.py \
+    --host "0.0.0.0" \
+    --port "$PROXY_PORT" \
+    --encode-servers-urls "http://localhost:$ENCODE_PORT" \
+    --prefill-servers-urls "disable" \
+    --decode-servers-urls "http://localhost:$PREFILL_DECODE_PORT" \
+    >"${PROXY_LOG}" 2>&1 &
+
+PIDS+=($!)
+
+wait_for_server $PROXY_PORT
+echo "All services are up!"
+
+###############################################################################
+# Single request with local image
+###############################################################################
+echo "Running single request with local image (non-stream)..."
+echo "Running single request with local image (non-stream)..."
+
+base64_image=$(base64 -w 0 "${VLLM_ROOT}/tests/v1/ec_connector/integration/hato.jpg")
+
+cat > /tmp/request.json << EOF
+{
+    "model": "${MODEL}",
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant."
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "data:image/jpg;base64,${base64_image}"
+                    }
+                },
+                {
+                    "type": "text",
+                    "text": "What is in this image?"
+                }
+            ]
+        }
+    ]
+}
+EOF
+
+curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d @/tmp/request.json
+
+rm -f /tmp/request.json
+
+###############################################################################
+# Benchmark
+###############################################################################
+echo "Running benchmark (stream)..."
+vllm bench serve \
+  --model               $MODEL \
+  --backend             openai-chat \
+  --endpoint            /v1/chat/completions \
+  --dataset-name        random-mm \
+  --seed                0 \
+  --num-prompts         $NUM_PROMPTS \
+  --port                $PROXY_PORT
+
+PIDS+=($!)
+
+# cleanup
+echo "cleanup..."
+cleanup