Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions components/src/dynamo/common/tests/test_video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _mock_iio_v2(self):
iio.get_writer = MagicMock(return_value=writer)
return iio, writer

def test_mp4_selects_libx264_codec(self):
def test_mp4_selects_h264_nvenc_codec(self):
from dynamo.common.utils.video_utils import encode_to_video_bytes

iio = self._mock_iio_v3()
Expand All @@ -56,7 +56,7 @@ def test_mp4_selects_libx264_codec(self):

iio.imwrite.assert_called_once()
_, kwargs = iio.imwrite.call_args
assert kwargs.get("codec") == "libx264"
assert kwargs.get("codec") == "h264_nvenc"
assert kwargs.get("fps") == 8

def test_webm_selects_libvpx_vp9_codec(self):
Expand Down
12 changes: 7 additions & 5 deletions components/src/dynamo/common/utils/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,15 @@ def encode_to_mp4(
logger.info(f"Encoding {len(frames)} frames to {output_path} at {fps} fps")

try:
# Use imageio to write MP4
# imageio.v3 API
# Use imageio to write MP4. We use h264_nvenc (NVIDIA HW encoder) instead
# of libx264 because the in-tree ffmpeg build is LGPL-only and libx264
# is GPL-licensed; see container/templates/wheel_builder.Dockerfile.
# Requires a CUDA-capable GPU at runtime.
if hasattr(iio, "imwrite"):
iio.imwrite(output_path, frames, fps=fps, codec="libx264")
iio.imwrite(output_path, frames, fps=fps, codec="h264_nvenc")
else:
# Fall back to v2 API
writer = iio.get_writer(output_path, fps=fps, codec="libx264") # type: ignore[attr-defined]
writer = iio.get_writer(output_path, fps=fps, codec="h264_nvenc") # type: ignore[attr-defined]
try:
for frame in frames:
writer.append_data(frame)
Expand Down Expand Up @@ -215,7 +217,7 @@ def encode_to_video_bytes(
if output_format == "webm":
kwargs["codec"] = "libvpx-vp9"
elif output_format == "mp4":
kwargs["codec"] = "libx264"
kwargs["codec"] = "h264_nvenc"
else:
raise ValueError(f"No codec specified for response format: {output_format}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ async def _generate_video(
return video_bytes

async def _frames_to_video(
self, frames: list, fps: int, codec: str = "libx264"
self, frames: list, fps: int, codec: str = "h264_nvenc"
) -> bytes:
"""Convert list of frames to video bytes.

Expand Down
6 changes: 6 additions & 0 deletions container/context.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ dynamo:
enable_media_ffmpeg: "false"
enable_gpu_memory_service: "true"
ffmpeg_version: "8.1"
# ffmpeg build inputs (only consumed when ENABLE_MEDIA_FFMPEG=true).
nv_codec_headers_ref: "n13.0.19.0"
libvpx_ref: "v1.14.1"
sccache_version: "v0.14.0"
efa_version: 1.47.0

vllm:
Expand Down Expand Up @@ -73,6 +77,8 @@ vllm:
enable_kvbm: "true"
enable_modelexpress_p2p: "false"
modelexpress_ref: "76fc5d7f06c37121ee8789a29fac6f9b08c4743a" # v0.3.0
# aws-sdk-cpp tag for the NIXL OBJ / S3 backend (built in wheel_builder).
aws_sdk_cpp_version: "1.11.760"

sglang:
cuda12.9:
Expand Down
9 changes: 8 additions & 1 deletion container/deps/requirements.common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,21 @@
# Core runtime dependencies shared by ALL Dynamo containers.
# See README.md in this directory for version pinning strategy.

# Force a source install of imageio-ffmpeg (pure-Python wrapper). The PyPI wheel
# bundles a prebuilt, GPL-encumbered ffmpeg binary in <site-packages>/imageio_ffmpeg/binaries/
# that has CVE exposure; we point imageio at the in-tree LGPL ffmpeg CLI via
# IMAGEIO_FFMPEG_EXE instead. This directive is honored by pip and uv when this
# file is passed via --requirement, and applies to the whole install.
--no-binary imageio-ffmpeg

aiohttp>=3.9.0,<4.0
fastapi==0.120.1
grpcio-tools<=1.76.0 # May have platform-specific builds; pins grpcio ecosystem version
httpx==0.28.1

# Video generation: encode frames to MP4 (used by TRT-LLM, vLLM-Omni, SGLang diffusion)
imageio>=2.37.0
imageio-ffmpeg>=0.6.0
imageio-ffmpeg>=0.6.0 # binary skipped per --no-binary directive at top of file
# Shared plotting utility used by runtime diagnostics and benchmark tooling.
matplotlib==3.10.7
msgpack==1.1.2
Expand Down
12 changes: 12 additions & 0 deletions container/deps/requirements.sglang.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Third-party Python dependencies for the sglang runtime image. Installed
# with --force-reinstall --no-deps to replace the upstream lmsysorg/sglang
# base image's imageio-ffmpeg wheel (which ships a GPL-encumbered prebuilt
# ffmpeg binary) with a source build that leaves no binary on disk.
# IMAGEIO_FFMPEG_EXE points imageio at the in-tree LGPL ffmpeg CLI.

--no-binary imageio-ffmpeg

imageio-ffmpeg>=0.6.0 # binary skipped per --no-binary directive at top of file
6 changes: 6 additions & 0 deletions container/templates/args.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,15 @@ ARG ETCD_VERSION={{ context.dynamo.etcd_version }}

ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
ARG NV_CODEC_HEADERS_REF={{ context.dynamo.nv_codec_headers_ref }}
ARG LIBVPX_REF={{ context.dynamo.libvpx_ref }}
{% if device == "cuda" -%}
ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
{% endif %}

# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_VERSION={{ context.dynamo.sccache_version }}
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""

Expand Down Expand Up @@ -108,6 +111,9 @@ ARG DEEPGEMM_REF=""
# ModelExpress for P2P weight transfer (optional)
ARG ENABLE_MODELEXPRESS_P2P={{ context.vllm.enable_modelexpress_p2p }}
ARG MODELEXPRESS_REF={{ context.vllm.modelexpress_ref }}

# aws-sdk-cpp tag for the NIXL OBJ / S3 backend (built in wheel_builder).
ARG AWS_SDK_CPP_VERSION={{ context.vllm.aws_sdk_cpp_version }}
{% endif %}
{%- endif -%}

Expand Down
2 changes: 1 addition & 1 deletion container/templates/dynamo_base.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ RUN apt clean && apt-get update -y && \

# Install sccache into the base image so downstream stages can COPY it
# instead of downloading from GitHub (avoids 502 errors under parallel builds)
ARG SCCACHE_VERSION=v0.14.0
ARG SCCACHE_VERSION
RUN ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64") && \
wget --tries=3 --waitretry=5 \
"https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz" && \
Expand Down
8 changes: 6 additions & 2 deletions container/templates/dynamo_runtime.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,17 @@ COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/

# Always copy FFmpeg so libs are available for Rust checks in CI
# Always copy FFmpeg so libs are available for Rust checks in CI.
# libvpx.so* is included because the in-tree ffmpeg is built with --enable-libvpx,
# so libavcodec.so has a runtime dependency on libvpx.so.9.
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
mkdir -p /usr/local/lib/pkgconfig && \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
ldconfig

{% if target not in ("dev", "local-dev") %}
# Copy built artifacts (not needed for dev/local-dev; users build from source)
Expand Down
30 changes: 25 additions & 5 deletions container/templates/sglang_runtime.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,24 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh

{% if context.sglang.enable_media_ffmpeg == "true" %}
# Copy ffmpeg
# Copy ffmpeg from wheel_builder: versioned shared libs (libav*.so*,
# libsw*.so*) for the Rust media-ffmpeg decoder, plus the LGPL CLI binary
# (built with h264_nvenc + libvpx_vp9 encoders) that imageio targets via
# IMAGEIO_FFMPEG_EXE for video encoding. Ungated by enable_media_ffmpeg
# because the upstream lmsysorg/sglang base image always ships
# imageio-ffmpeg with a GPL-encumbered prebuilt binary that we replace
# unconditionally below; the LGPL CLI must be present so imageio has
# something to target.
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
mkdir -p /usr/local/lib/pkgconfig && \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
cp -nL /tmp/usr/local/lib/libav*.so* /tmp/usr/local/lib/libsw*.so* /usr/local/lib/ && \
cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
{% endif %}
cp -nL /tmp/usr/local/bin/ffmpeg /usr/local/bin/ffmpeg && \
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
ldconfig
ENV IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg

{% if target not in ("dev", "local-dev") %}
# Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
Expand Down Expand Up @@ -86,6 +95,17 @@ RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tm
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages --no-deps $(grep -E '^nvtx==' /tmp/requirements.common.txt)

# Replace the upstream lmsysorg/sglang image's imageio-ffmpeg (which ships a
# GPL-encumbered prebuilt ffmpeg binary in <site-packages>/imageio_ffmpeg/binaries/)
# with a source install that leaves no binary on disk. IMAGEIO_FFMPEG_EXE points
# imageio at the LGPL CLI we copied from wheel_builder above. The --no-binary
# directive lives in the requirements file itself.
RUN --mount=type=bind,source=./container/deps/requirements.sglang.txt,target=/tmp/requirements.sglang.txt \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages --force-reinstall --no-deps \
--requirement /tmp/requirements.sglang.txt

# Copy tests, deploy and components for CI with correct ownership
COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
Expand Down
14 changes: 14 additions & 0 deletions container/templates/trtllm_runtime.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,20 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
{% endif %}

# Copy the in-tree LGPL ffmpeg from wheel_builder. The TRT-LLM diffusion handler
# always encodes video (video_handler.py:263 → encode_to_video_bytes), so the
# CLI and its libav* / libvpx runtime libs need to be present in this image and
# imageio must be pointed at it via IMAGEIO_FFMPEG_EXE. Ungated by
# enable_media_ffmpeg because TRT-LLM unconditionally needs the encoder.
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
cp -nL /tmp/usr/local/lib/libav*.so* /usr/local/lib/ 2>/dev/null || true && \
cp -nL /tmp/usr/local/lib/libsw*.so* /usr/local/lib/ 2>/dev/null || true && \
cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
cp -nL /tmp/usr/local/bin/ffmpeg /usr/local/bin/ffmpeg && \
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
ldconfig
ENV IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg

# Copy TensorRT and libgomp from framework image (arch-dependent path, needs root)
COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt
RUN --mount=type=bind,from=framework,source=/usr/lib,target=/mnt/usr_lib \
Expand Down
7 changes: 7 additions & 0 deletions container/templates/vllm_runtime.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -363,12 +363,19 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \

# Install runtime dependencies (common + vllm-specific + benchmarks).
# Test and dev dependencies are NOT installed here — they go in the test and dev images.
# --reinstall-package imageio-ffmpeg forces a source rebuild of imageio-ffmpeg
# (honoring the `--no-binary imageio-ffmpeg` directive in requirements.common.txt),
# replacing the GPL-encumbered prebuilt ffmpeg binary that the upstream
# vllm/vllm-openai base image ships. vLLM-Omni uses diffusers.export_to_video and
# doesn't invoke imageio-ffmpeg, so no IMAGEIO_FFMPEG_EXE is needed — this is
# purely to clear the GPL binary.
RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tmp/requirements.common.txt \
--mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \
--mount=type=bind,source=./container/deps/requirements.benchmark.txt,target=/tmp/requirements.benchmark.txt \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775,sharing=shared \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--reinstall-package imageio-ffmpeg \
--requirement /tmp/requirements.common.txt \
--requirement /tmp/requirements.vllm.txt \
--requirement /tmp/requirements.benchmark.txt
Expand Down
44 changes: 35 additions & 9 deletions container/templates/wheel_builder.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,16 @@ RUN if [ "$USE_SCCACHE" = "true" ]; then \
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}

# Always build FFmpeg so libs are available for Rust checks in CI
# Do not delete the source tarball for legal reasons
# Always build FFmpeg so libs are available for Rust checks in CI.
# We also build the ffmpeg CLI with h264_nvenc + libvpx_vp9 encoders so Python
# code can encode video without the GPL-licensed binary shipped by imageio-ffmpeg.
# Stays LGPL-only: --disable-gpl --disable-nonfree are preserved; H.264 comes from
# NVIDIA's NVENC (proprietary HW encoder, already a runtime dependency of these
# GPU images) and VP9 from libvpx (BSD).
# Do not delete the source tarball for legal reasons.
ARG FFMPEG_VERSION
ARG NV_CODEC_HEADERS_REF
ARG LIBVPX_REF
RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token \
--mount=type=secret,id=aws-role-arn,env=AWS_ROLE_ARN \
export AWS_WEB_IDENTITY_TOKEN_FILE=/run/secrets/aws-token && \
Expand All @@ -263,11 +270,26 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
eval $(/tmp/use-sccache.sh setup-env); \
fi && \
if [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
apt-get update -y && apt-get install -y build-essential pkg-config xz-utils; \
apt-get update -y && apt-get install -y build-essential pkg-config xz-utils git yasm; \
apt-get clean && rm -rf /var/lib/apt/lists/*; \
elif [ "$DEVICE" = "cuda" ]; then \
dnf install -y pkg-config xz; \
dnf install -y pkg-config xz git yasm; \
fi && \
# nv-codec-headers: provides the NVENC/NVDEC API headers ffmpeg compiles against.
# Header-only, no runtime dep here; libcuda/libnvidia-encode are loaded at runtime
# in the consuming container.
cd /tmp && \
git clone --depth 1 --branch ${NV_CODEC_HEADERS_REF} https://github.com/FFmpeg/nv-codec-headers.git && \
make -C nv-codec-headers PREFIX=/usr/local install && \
# libvpx: BSD-licensed VP9 encoder needed for the WebM output path. Built from
# source so we don't need to track distro package names (libvpx-dev on Debian
# vs libvpx-devel via EPEL on RHEL/manylinux).
git clone --depth 1 --branch ${LIBVPX_REF} https://chromium.googlesource.com/webm/libvpx.git && \
cd libvpx && \
./configure --prefix=/usr/local --enable-shared --disable-static --disable-examples --disable-unit-tests --disable-tools --disable-docs && \
make -j$(nproc) && \
make install && \
ldconfig && \
cd /tmp && \
curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
Expand All @@ -276,17 +298,21 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
--prefix=/usr/local \
--disable-gpl \
--disable-nonfree \
--disable-programs \
--disable-doc \
--disable-static \
--disable-x86asm \
--disable-network \
--disable-encoders \
--disable-muxers \
--disable-bsfs \
--disable-devices \
--disable-libdrm \
--enable-shared && \
--enable-shared \
--enable-nvenc \
--enable-libvpx \
--disable-encoders \
--enable-encoder=h264_nvenc,libvpx_vp9 \
--disable-muxers \
--enable-muxer=mov,mp4,matroska,webm \
--enable-protocol=file,pipe && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "FFMPEG" && \
Expand Down Expand Up @@ -395,7 +421,7 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token

{% if framework == "vllm" and device == "cuda" %}
# Build and install AWS SDK C++ (required for NIXL OBJ backend / S3 support)
ARG AWS_SDK_CPP_VERSION=1.11.760
ARG AWS_SDK_CPP_VERSION
RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token \
--mount=type=secret,id=aws-role-arn,env=AWS_ROLE_ARN \
export AWS_WEB_IDENTITY_TOKEN_FILE=/run/secrets/aws-token && \
Expand Down
6 changes: 4 additions & 2 deletions docs/backends/trtllm/trtllm-diffusion.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ image generation through `--modality image_diffusion` flag.

- **TensorRT-LLM with visual_gen**: The `visual_gen` module is part of TensorRT-LLM (`tensorrt_llm._torch.visual_gen`). Install TensorRT-LLM following the [official instructions](https://github.com/NVIDIA/TensorRT-LLM#installation).
- **dynamo-runtime with multimodal API**: The Dynamo runtime must include `ModelType.Videos` or `ModelType.Images` support. Ensure you're using a compatible version.
- **VIDEO diffusion: imageio with ffmpeg**: Required for encoding generated frames to MP4 video:
- **VIDEO diffusion: imageio with ffmpeg**: Required for encoding generated frames to MP4 video. The Dynamo TRT-LLM runtime container ships an LGPL-only ffmpeg CLI built with the NVIDIA NVENC H.264 encoder (`h264_nvenc`) and `libvpx_vp9` for WebM, and points `imageio` at it via `IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg` — the GPL-encumbered ffmpeg binary normally shipped inside the `imageio-ffmpeg` PyPI wheel is **not** installed. If you're running outside the container, install the Python wrapper without the bundled binary and point it at your own ffmpeg:
```bash
pip install imageio[ffmpeg]
pip install --no-binary imageio-ffmpeg "imageio[ffmpeg]"
export IMAGEIO_FFMPEG_EXE=/path/to/your/ffmpeg
```
MP4 output requires an NVIDIA GPU at runtime (NVENC is a hardware encoder).

## Supported Models

Expand Down
Loading