Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions examples/online_serving/bagel/run_server_stage_cli.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Bagel multi-stage online serving startup script
# Starts stage 0 as master with API server, and stage 1 in headless mode

MODEL="${MODEL:-ByteDance-Seed/BAGEL-7B-MoT}"
PORT="${PORT:-8091}"
MASTER_ADDRESS="${MASTER_ADDRESS:-127.0.0.1}"
MASTER_PORT="${MASTER_PORT:-8092}"
STAGE_CONFIGS_PATH="$(dirname "$0")/../../../vllm_omni/model_executor/stage_configs/bagel.yaml"

echo "Starting Bagel multi-stage server..."
echo "Model: $MODEL"
echo "API Port: $PORT"
echo "Master Address: $MASTER_ADDRESS"
echo "Master Port: $MASTER_PORT"
echo "Stage Configs: $STAGE_CONFIGS_PATH"

# Start stage 1 (DiT) in headless mode first
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why we init stage 1 first?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, which stage init first is OK

echo "Starting Stage 1 (DiT) in headless mode..."
vllm serve "$MODEL" --omni \
--stage-configs-path "$STAGE_CONFIGS_PATH" \
--stage-id 1 \
--headless \
-oma "$MASTER_ADDRESS" \
-omp "$MASTER_PORT" &
Comment on lines +24 to +25
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's best to use the full name here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's OK, one can search the code for full name. The short name is more easy for typing.

Comment thread
wuhang2014 marked this conversation as resolved.

# Start stage 0 (Thinker) as master with API server
echo "Starting Stage 0 (Thinker) as master..."
vllm serve "$MODEL" --omni \
--port "$PORT" \
--stage-configs-path "$STAGE_CONFIGS_PATH" \
--stage-id 0 \
-oma "$MASTER_ADDRESS" \
-omp "$MASTER_PORT"
1 change: 1 addition & 0 deletions requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ imageio[ffmpeg]>=2.37.2
sox>=1.5.0
prettytable>=3.8.0
aenum==3.1.16
pyzmq>=25.0.0
22 changes: 12 additions & 10 deletions tests/entrypoints/test_async_omni_diffusion_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,23 @@

import pytest

from vllm_omni.entrypoints import omni as omni_module
from vllm_omni.entrypoints import utils as utils_module
from vllm_omni.entrypoints.async_omni import AsyncOmni

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]

MODEL = "riverclouds/qwen_image_random"


def test_default_stage_config_includes_cache_backend(monkeypatch):
"""Ensure cache_backend/cache_config are preserved in default diffusion stage."""
monkeypatch.setattr(omni_module, "load_stage_configs_from_model", lambda model, base_engine_args=None: [])
monkeypatch.setattr(omni_module, "resolve_model_config_path", lambda model: None)
monkeypatch.setattr(utils_module, "load_stage_configs_from_model", lambda model, base_engine_args=None: [])
monkeypatch.setattr(utils_module, "resolve_model_config_path", lambda model: None)
monkeypatch.setattr(AsyncOmni, "_start_stages", lambda self, model: None)
monkeypatch.setattr(AsyncOmni, "_wait_for_stages_ready", lambda self, timeout=0: None)

omni = AsyncOmni(
model="dummy-model",
model=MODEL,
cache_backend="cache_dit",
cache_config='{"Fn_compute_blocks": 2}',
vae_use_slicing=True,
Expand All @@ -41,13 +43,13 @@ def test_default_stage_config_includes_cache_backend(monkeypatch):

def test_default_cache_config_used_when_missing(monkeypatch):
"""Ensure default cache_config is applied when cache_backend is set."""
monkeypatch.setattr(omni_module, "load_stage_configs_from_model", lambda model, base_engine_args=None: [])
monkeypatch.setattr(omni_module, "resolve_model_config_path", lambda model: None)
monkeypatch.setattr(utils_module, "load_stage_configs_from_model", lambda model, base_engine_args=None: [])
monkeypatch.setattr(utils_module, "resolve_model_config_path", lambda model: None)
monkeypatch.setattr(AsyncOmni, "_start_stages", lambda self, model: None)
monkeypatch.setattr(AsyncOmni, "_wait_for_stages_ready", lambda self, timeout=0: None)

omni = AsyncOmni(
model="dummy-model",
model=MODEL,
cache_backend="cache_dit",
)

Expand All @@ -59,13 +61,13 @@ def test_default_cache_config_used_when_missing(monkeypatch):

def test_default_stage_devices_from_sequence_parallel(monkeypatch):
"""Ensure devices list reflects sequence parallel size when no parallel_config is provided."""
monkeypatch.setattr(omni_module, "load_stage_configs_from_model", lambda model, base_engine_args=None: [])
monkeypatch.setattr(omni_module, "resolve_model_config_path", lambda model: None)
monkeypatch.setattr(utils_module, "load_stage_configs_from_model", lambda model, base_engine_args=None: [])
monkeypatch.setattr(utils_module, "resolve_model_config_path", lambda model: None)
monkeypatch.setattr(AsyncOmni, "_start_stages", lambda self, model: None)
monkeypatch.setattr(AsyncOmni, "_wait_for_stages_ready", lambda self, timeout=0: None)

omni = AsyncOmni(
model="dummy-model",
model=MODEL,
ulysses_degree=2,
ring_degree=2,
)
Expand Down
Loading