Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions components/src/dynamo/common/utils/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,34 @@ def normalize_video_frames(images: list) -> list:
return list(frames)


def normalize_image_frames(images: list) -> list:
"""Normalize stage_output.images into a flat list of PIL Images.

Image diffusion pipelines usually return PIL Images, but some (e.g. the
Cosmos3 native pipeline) return numpy arrays shaped ``[batch, frames, H, W,
C]`` even for single images. Collapse leading batch/frame dims and convert
each frame to a PIL Image; PIL inputs pass through unchanged.
"""
from PIL import Image

out: list = []
for item in images:
if isinstance(item, Image.Image):
out.append(item)
continue
arr = np.asarray(item)
while arr.ndim > 4: # [batch, frames, H, W, C] -> [frames, H, W, C]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

normalize_image_frames collapses a [B, F, H, W, C] Cosmos3 array by taking arr[0], so image requests with n > 1 silently drop every generated batch after the first. Fix: preserve and flatten all leading batch/frame dimensions before converting frames to PIL images.

🤖 AI Fix

In components/src/dynamo/common/utils/video_utils.py, update normalize_image_frames to replace the while arr.ndim > 4: arr = arr[0] logic with validation that the last three dimensions are H, W, C and arr = arr.reshape((-1, *arr.shape[-3:])) so all [B, F, H, W, C] outputs are emitted.

arr = arr[0]
if arr.dtype != np.uint8: # frames share a dtype/range; convert once
arr = ((arr.clip(0, 1) * 255).round() if arr.max() <= 1.0 else arr).astype(
np.uint8
)
frames = arr if arr.ndim == 4 else arr[None] # -> [N, H, W, C]
for frame in frames:
out.append(Image.fromarray(frame))
return out


def frames_to_numpy(images: list) -> np.ndarray:
"""Convert a list of PIL Images to a numpy array suitable for video encoding.

Expand Down
15 changes: 15 additions & 0 deletions components/src/dynamo/vllm/omni/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,16 @@ def add_arguments(self, parser) -> None:
default=False,
help="Disable torch.compile and force eager execution for diffusion models.",
)
add_negatable_bool_argument(
g,
flag_name="--cosmos3-guardrails",
env_var="DYN_OMNI_COSMOS3_GUARDRAILS",
default=True,
help=(
"Enable Cosmos3 text/video safety guardrails (loads guardrail models "
"at startup). Use --no-cosmos3-guardrails to disable."
),
)

# TTS parameters
tts_g = parser.add_argument_group(
Expand Down Expand Up @@ -333,6 +343,11 @@ class OmniConfig(DynamoRuntimeConfig):
stage_configs_path: Optional[str] = None
default_video_fps: int = 16

# Cosmos3 safety guardrails. When False, routed into
# od_config.model_config["guardrails"]=False so the diffusion engine skips
# loading the guardrail models (see base_handler._build_omni_kwargs).
cosmos3_guardrails: bool = True

# Nested structs — each group of fields has a clear destination
diffusion: OmniDiffusionKwargs = dataclasses.field(
default_factory=OmniDiffusionKwargs
Expand Down
6 changes: 6 additions & 0 deletions components/src/dynamo/vllm/omni/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def _build_omni_kwargs(self, config) -> Dict[str, Any]:
if config.stage_configs_path:
omni_kwargs["stage_configs_path"] = config.stage_configs_path

# Cosmos3 guardrails toggle -> od_config.model_config["guardrails"].
# Mirrors vllm-omni serve's --cosmos3-no-guardrails; when disabled the
# diffusion engine skips loading the guardrail models entirely.
if not config.cosmos3_guardrails:
omni_kwargs["model_config"] = {"guardrails": False}

for field, value in dataclasses.asdict(config.diffusion).items():
if value is not None:
omni_kwargs[field] = value
Expand Down
7 changes: 5 additions & 2 deletions components/src/dynamo/vllm/omni/output_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
from dynamo.common.storage import upload_to_fs
from dynamo.common.utils.engine_response import normalize_finish_reason
from dynamo.common.utils.output_modalities import RequestType
from dynamo.common.utils.video_utils import normalize_video_frames
from dynamo.common.utils.video_utils import (
normalize_image_frames,
normalize_video_frames,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -241,7 +244,7 @@ async def _prepare_images(
self, images: list, request_id: str, response_format: Optional[str] = None
) -> list:
outlist = []
for img in images:
for img in normalize_image_frames(images):
buf = BytesIO()
img.save(buf, format="PNG")
image_bytes = buf.getvalue()
Expand Down
7 changes: 6 additions & 1 deletion container/context.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,12 @@ vllm:
base_image_tag: 22.04
runtime_image_tag: v0.21.0
flashinf_ref: v0.6.8.post1
vllm_omni_ref: "v0.21.0rc1"
# Cosmos3 support is not yet in a released vllm-omni; install from the
# canonical repo pinned to the PR head commit (vllm-project/vllm-omni#3454,
# == refs/pull/3454/head). When vllm_omni_git_url is set, install_vllm_omni.sh
# installs from git@ref; otherwise it falls back to "vllm-omni==<ref>".
vllm_omni_ref: "e826f626afb47c8c3c39ccf892ed247f442f6bd2"
vllm_omni_git_url: "https://github.com/vllm-project/vllm-omni.git"
nixl_ref: v1.1.0
max_jobs: "10"
enable_media_ffmpeg: "false"
Expand Down
14 changes: 11 additions & 3 deletions container/deps/vllm/install_vllm_omni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ set -euo pipefail
VLLM_OMNI_PROTECTED_PACKAGES_FILE="${VLLM_OMNI_PROTECTED_PACKAGES_FILE:-/tmp/vllm_omni_protected_packages.txt}"

PROTECTED_CONSTRAINTS="$(mktemp /tmp/vllm-openai-protected.XXXXXX.txt)"
VLLM_OMNI_VERSION="${VLLM_OMNI_REF#v}"

# When VLLM_OMNI_GIT_URL is set, install from a git ref (e.g. an unreleased PR
# commit); otherwise fall back to the matching PyPI release.
VLLM_OMNI_GIT_URL="${VLLM_OMNI_GIT_URL:-}"
if [ -n "${VLLM_OMNI_GIT_URL}" ]; then
VLLM_OMNI_SPEC="vllm-omni @ git+${VLLM_OMNI_GIT_URL}@${VLLM_OMNI_REF}"
else
VLLM_OMNI_SPEC="vllm-omni==${VLLM_OMNI_REF#v}"
fi

cleanup() {
rm -rf "${PROTECTED_CONSTRAINTS}"
Expand Down Expand Up @@ -41,11 +49,11 @@ if [ "${VLLM_OMNI_TARGET_DEVICE}" = "cuda" ]; then
uv pip install --system \
--prerelease=allow \
--constraints "${PROTECTED_CONSTRAINTS}" \
"vllm-omni==${VLLM_OMNI_VERSION}"
"${VLLM_OMNI_SPEC}"
else
uv pip install \
--prerelease=allow \
--constraints "${PROTECTED_CONSTRAINTS}" \
"vllm-omni==${VLLM_OMNI_VERSION}"
"${VLLM_OMNI_SPEC}"
fi

1 change: 1 addition & 0 deletions container/templates/args.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ ARG MAX_JOBS={{ context.vllm.max_jobs }}
ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
{% endif %}
ARG VLLM_OMNI_REF={{ context.vllm.vllm_omni_ref }}
ARG VLLM_OMNI_GIT_URL={{ context.vllm.vllm_omni_git_url }}

{% if device == "cuda" -%}
# If left blank, then we will fallback to vLLM defaults
Expand Down
1 change: 1 addition & 0 deletions container/templates/vllm_runtime.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
ARG VLLM_OMNI_REF
ARG VLLM_OMNI_GIT_URL
ARG NIXL_REF
{% if device == "cuda" %}
ARG CUDA_MAJOR
Expand Down
72 changes: 72 additions & 0 deletions examples/backends/vllm/launch/agg_omni_cosmos3_i2v.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Aggregated Cosmos3 image-to-video generation (1 GPU).
# Same worker as text-to-video (registers the "video" modality); i2v is driven
# by adding "input_reference" to the /v1/videos request. The image loader
# rejects local file paths — pass a data: URI (base64) or an http(s) URL.
# --no-cosmos3-guardrails skips loading the safety guardrail models.

set -e
trap 'echo Cleaning up...; kill 0' EXIT

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
source "$SCRIPT_DIR/../../../common/gpu_utils.sh"
source "$SCRIPT_DIR/../../../common/launch_utils.sh"

MODEL="nvidia/Cosmos3-Nano"

# Parse command line arguments
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
--model)
MODEL="$2"
shift 2
;;
*)
EXTRA_ARGS+=("$1")
shift
;;
esac
done

HTTP_PORT="${DYN_HTTP_PORT:-8000}"
GPU_MEM_ARGS=$(build_vllm_gpu_mem_args)
print_launch_banner --no-curl "Launching vLLM-Omni Cosmos3 Image-to-Video (1 GPU)" "$MODEL" "$HTTP_PORT"
print_curl_footer <<CURL
# input_reference must be an http(s) URL or a data: URI (local paths are rejected)
curl -s http://localhost:${HTTP_PORT}/v1/videos \\
-H 'Content-Type: application/json' \\
-d '{
"model": "${MODEL}",
"prompt": "The scene comes alive, gentle camera motion",
"input_reference": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg",
"size": "512x512",
"response_format": "url",
"nvext": {
"num_inference_steps": 20,
"num_frames": 17
}
}' | jq
CURL


python -m dynamo.frontend &
FRONTEND_PID=$!

sleep 2

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Remove fixed readiness sleep and use shared health-check orchestration.

sleep 2 is a fragile startup gate and violates the launch-script convention for readiness handling.

As per coding guidelines, launch scripts should “Avoid readiness sleeps/polls; rely on the shared framework health-check patterns instead.”

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@examples/backends/vllm/launch/agg_omni_cosmos3_i2v.sh` at line 50, Remove the
fragile fixed wait ("sleep 2") from agg_omni_cosmos3_i2v.sh and replace it with
the project’s shared health-check orchestration: remove the "sleep 2" line and
invoke the centralized readiness check (use the launch framework's health-check
helper or wait-for-ready wrapper used by other launch scripts) to block until
the service reports healthy; ensure you call the same health-check entrypoint
used elsewhere in the repo so the script follows the launch-script convention
for readiness handling.


echo "Starting Omni worker..."
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
python -m dynamo.vllm.omni \
--model "$MODEL" \
--output-modalities video \
--no-cosmos3-guardrails \
--media-output-fs-url file:///tmp/dynamo_media \
$GPU_MEM_ARGS \
"${EXTRA_ARGS[@]}" &

# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
wait_any_exit
63 changes: 63 additions & 0 deletions examples/backends/vllm/launch/agg_omni_cosmos3_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Aggregated Cosmos3 text-to-image generation (1 GPU).
# Uses the native vLLM-Omni Cosmos3 pipeline; --no-cosmos3-guardrails skips
# loading the safety guardrail models. A worker serves a single modality, so
# this script registers the "image" modality (see agg_omni_cosmos3_video.sh
# for text-to-video).

set -e
trap 'echo Cleaning up...; kill 0' EXIT

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
source "$SCRIPT_DIR/../../../common/launch_utils.sh"

Comment on lines +15 to +16

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Align this launcher with shared vLLM GPU-memory utilities.

This script skips gpu_utils.sh and does not use build_vllm_gpu_mem_args, so users can’t control VRAM behavior consistently with the other Cosmos3 launchers.

As per coding guidelines, launchers should source gpu_utils.sh and “Use build_vllm_gpu_mem_args() to construct GPU memory CLI flags for vLLM.”

Also applies to: 50-57

🧰 Tools
🪛 Shellcheck (0.11.0)

[info] 15-15: Not following: ./../../../common/launch_utils.sh was not specified as input (see shellcheck -x).

(SC1091)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@examples/backends/vllm/launch/agg_omni_cosmos3_image.sh` around lines 15 -
16, The launcher missing gpu_utils integration should source gpu_utils.sh (via
SCRIPT_DIR/../../../common/gpu_utils.sh) and use build_vllm_gpu_mem_args() when
constructing the vLLM CLI invocation in agg_omni_cosmos3_image.sh; update the
script to source gpu_utils.sh near the other shared utils and insert the output
of build_vllm_gpu_mem_args into the vLLM/vllm-server command-line assembly so
GPU memory flags are consistent with other Cosmos3 launchers.

MODEL="nvidia/Cosmos3-Nano"

# Parse command line arguments
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
--model)
MODEL="$2"
shift 2
;;
*)
EXTRA_ARGS+=("$1")
shift
;;
esac
done

HTTP_PORT="${DYN_HTTP_PORT:-8000}"
print_launch_banner --no-curl "Launching vLLM-Omni Cosmos3 Image Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
print_curl_footer <<CURL
curl -s -X POST http://localhost:${HTTP_PORT}/v1/images/generations \\
-H 'Content-Type: application/json' \\
-d '{
"model": "${MODEL}",
"prompt": "A robot standing in a bright laboratory",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imo, for the examples, I think we should provide an appropriate JSON caption, not a dense one.
currently, we don't have any upsampling within the container, so our example captions should only be JSON strings.

If later on, we add JSON upsampling within the container, we can have a normal "dense" prompt as an example and then a extra parameter like "upsample_prompt=True" or whatever.

"size": "512x512",
"num_inference_steps": 20
}' | jq
CURL


python -m dynamo.frontend &
FRONTEND_PID=$!

sleep 2

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Replace fixed startup sleep with framework readiness handling.

Using sleep 2 introduces flaky startup behavior across machines.

As per coding guidelines, launch scripts should “Avoid readiness sleeps/polls; rely on the shared framework health-check patterns instead.”

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@examples/backends/vllm/launch/agg_omni_cosmos3_image.sh` at line 48, Replace
the fixed "sleep 2" with a proper readiness check: remove the "sleep 2" line and
instead call the shared framework health-check/ready helper (e.g., a common
script or function like wait_for_framework_ready or check_framework_health) in a
loop with a timeout and non-zero exit if not ready; ensure the script waits for
the specific service(s) the launch depends on and logs progress/errors so
startup is deterministic and not flaky.


echo "Starting Omni worker..."
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
python -m dynamo.vllm.omni \
--model "$MODEL" \
--output-modalities image \
--no-cosmos3-guardrails \
--media-output-fs-url file:///tmp/dynamo_media \
"${EXTRA_ARGS[@]}" &

# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
wait_any_exit
70 changes: 70 additions & 0 deletions examples/backends/vllm/launch/agg_omni_cosmos3_video.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Aggregated Cosmos3 text-to-video generation (1 GPU).
# Uses the native vLLM-Omni Cosmos3 pipeline; --no-cosmos3-guardrails skips
# loading the safety guardrail models. A worker serves a single modality, so
# this script registers the "video" modality (see agg_omni_cosmos3_image.sh
# for text-to-image).

set -e
trap 'echo Cleaning up...; kill 0' EXIT

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
source "$SCRIPT_DIR/../../../common/gpu_utils.sh"
source "$SCRIPT_DIR/../../../common/launch_utils.sh"

MODEL="nvidia/Cosmos3-Nano"

# Parse command line arguments
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
--model)
MODEL="$2"
shift 2
;;
*)
EXTRA_ARGS+=("$1")
shift
;;
esac
done

HTTP_PORT="${DYN_HTTP_PORT:-8000}"
GPU_MEM_ARGS=$(build_vllm_gpu_mem_args)
print_launch_banner --no-curl "Launching vLLM-Omni Cosmos3 Video Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
print_curl_footer <<CURL
curl -s http://localhost:${HTTP_PORT}/v1/videos \\
-H 'Content-Type: application/json' \\
-d '{
"model": "${MODEL}",
"prompt": "A waterfall in a green forest, gentle mist",
"size": "512x512",
"response_format": "url",
"nvext": {
"num_inference_steps": 20,
"num_frames": 17
}
}' | jq
CURL


python -m dynamo.frontend &
FRONTEND_PID=$!

sleep 2

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Use shared readiness checks instead of a fixed sleep.

sleep 2 is not reliable for service readiness and can fail under slower startup conditions.

As per coding guidelines, launch scripts should “Avoid readiness sleeps/polls; rely on the shared framework health-check patterns instead.”

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@examples/backends/vllm/launch/agg_omni_cosmos3_video.sh` at line 49, Replace
the fixed "sleep 2" with a call to the repository's shared readiness-check
helper (instead of a blind sleep, invoke the common wait-for-ready/health-check
script or function used elsewhere), passing the service endpoint/port or health
URL for the component started in this script and fail the launch if the check
returns non-zero; specifically remove the "sleep 2" line and invoke the shared
readiness checker (e.g., wait_for_service or wait-for-ready) with the correct
args so the script blocks until a successful health response and exits on
timeout/error.


echo "Starting Omni worker..."
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
python -m dynamo.vllm.omni \
--model "$MODEL" \
--output-modalities video \
--no-cosmos3-guardrails \
--media-output-fs-url file:///tmp/dynamo_media \
$GPU_MEM_ARGS \
"${EXTRA_ARGS[@]}" &

# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
wait_any_exit
Loading