ai-dynamo · saturley-hall · May 31, 2026 · May 29, 2026
@@ -41,7 +41,7 @@ def _mock_iio_v2(self):
         iio.get_writer = MagicMock(return_value=writer)
         return iio, writer
 
-    def test_mp4_selects_libx264_codec(self):
+    def test_mp4_selects_h264_nvenc_codec(self):
         from dynamo.common.utils.video_utils import encode_to_video_bytes
 
         iio = self._mock_iio_v3()
@@ -56,7 +56,7 @@ def test_mp4_selects_libx264_codec(self):
 
             iio.imwrite.assert_called_once()
             _, kwargs = iio.imwrite.call_args
-            assert kwargs.get("codec") == "libx264"
+            assert kwargs.get("codec") == "h264_nvenc"
             assert kwargs.get("fps") == 8
 
     def test_webm_selects_libvpx_vp9_codec(self):

@@ -154,13 +154,15 @@ def encode_to_mp4(
     logger.info(f"Encoding {len(frames)} frames to {output_path} at {fps} fps")
 
     try:
-        # Use imageio to write MP4
-        # imageio.v3 API
+        # Use imageio to write MP4. We use h264_nvenc (NVIDIA HW encoder) instead
+        # of libx264 because the in-tree ffmpeg build is LGPL-only and libx264
+        # is GPL-licensed; see container/templates/wheel_builder.Dockerfile.
+        # Requires a CUDA-capable GPU at runtime.
         if hasattr(iio, "imwrite"):
-            iio.imwrite(output_path, frames, fps=fps, codec="libx264")
+            iio.imwrite(output_path, frames, fps=fps, codec="h264_nvenc")
         else:
             # Fall back to v2 API
-            writer = iio.get_writer(output_path, fps=fps, codec="libx264")  # type: ignore[attr-defined]
+            writer = iio.get_writer(output_path, fps=fps, codec="h264_nvenc")  # type: ignore[attr-defined]
             try:
                 for frame in frames:
                     writer.append_data(frame)
@@ -215,7 +217,7 @@ def encode_to_video_bytes(
         if output_format == "webm":
             kwargs["codec"] = "libvpx-vp9"
         elif output_format == "mp4":
-            kwargs["codec"] = "libx264"
+            kwargs["codec"] = "h264_nvenc"
         else:
             raise ValueError(f"No codec specified for response format: {output_format}")
 

@@ -260,7 +260,7 @@ async def _generate_video(
         return video_bytes
 
     async def _frames_to_video(
-        self, frames: list, fps: int, codec: str = "libx264"
+        self, frames: list, fps: int, codec: str = "h264_nvenc"
     ) -> bytes:
         """Convert list of frames to video bytes.
 

@@ -36,6 +36,10 @@ dynamo:
   enable_media_ffmpeg: "false"
   enable_gpu_memory_service: "true"
   ffmpeg_version: "8.1"
+  # ffmpeg build inputs (only consumed when ENABLE_MEDIA_FFMPEG=true).
+  nv_codec_headers_ref: "n13.0.19.0"
+  libvpx_ref: "v1.14.1"
+  sccache_version: "v0.14.0"
   efa_version: 1.47.0
 
 vllm:
@@ -73,6 +77,8 @@ vllm:
   enable_kvbm: "true"
   enable_modelexpress_p2p: "false"
   modelexpress_ref: "76fc5d7f06c37121ee8789a29fac6f9b08c4743a"  # v0.3.0
+  # aws-sdk-cpp tag for the NIXL OBJ / S3 backend (built in wheel_builder).
+  aws_sdk_cpp_version: "1.11.760"
 
 sglang:
   cuda12.9:

@@ -4,14 +4,21 @@
 # Core runtime dependencies shared by ALL Dynamo containers.
 # See README.md in this directory for version pinning strategy.
 
+# Force a source install of imageio-ffmpeg (pure-Python wrapper). The PyPI wheel
+# bundles a prebuilt, GPL-encumbered ffmpeg binary in <site-packages>/imageio_ffmpeg/binaries/
+# that has CVE exposure; we point imageio at the in-tree LGPL ffmpeg CLI via
+# IMAGEIO_FFMPEG_EXE instead. This directive is honored by pip and uv when this
+# file is passed via --requirement, and applies to the whole install.
+--no-binary imageio-ffmpeg
+
 aiohttp>=3.9.0,<4.0
 fastapi==0.120.1
 grpcio-tools<=1.76.0  # May have platform-specific builds; pins grpcio ecosystem version
 httpx==0.28.1
 
 # Video generation: encode frames to MP4 (used by TRT-LLM, vLLM-Omni, SGLang diffusion)
 imageio>=2.37.0
-imageio-ffmpeg>=0.6.0
+imageio-ffmpeg>=0.6.0  # binary skipped per --no-binary directive at top of file
 # Shared plotting utility used by runtime diagnostics and benchmark tooling.
 matplotlib==3.10.7
 msgpack==1.1.2

@@ -0,0 +1,12 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Third-party Python dependencies for the sglang runtime image. Installed
+# with --force-reinstall --no-deps to replace the upstream lmsysorg/sglang
+# base image's imageio-ffmpeg wheel (which ships a GPL-encumbered prebuilt
+# ffmpeg binary) with a source build that leaves no binary on disk.
+# IMAGEIO_FFMPEG_EXE points imageio at the in-tree LGPL ffmpeg CLI.
+
+--no-binary imageio-ffmpeg
+
+imageio-ffmpeg>=0.6.0  # binary skipped per --no-binary directive at top of file
@@ -55,12 +55,15 @@ ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
 
 ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
 ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
+ARG NV_CODEC_HEADERS_REF={{ context.dynamo.nv_codec_headers_ref }}
+ARG LIBVPX_REF={{ context.dynamo.libvpx_ref }}
 {% if device == "cuda" -%}
 ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
 {% endif %}
 
 # SCCACHE configuration
 ARG USE_SCCACHE
+ARG SCCACHE_VERSION={{ context.dynamo.sccache_version }}
 ARG SCCACHE_BUCKET=""
 ARG SCCACHE_REGION=""
 
@@ -108,6 +111,9 @@ ARG DEEPGEMM_REF=""
 # ModelExpress for P2P weight transfer (optional)
 ARG ENABLE_MODELEXPRESS_P2P={{ context.vllm.enable_modelexpress_p2p }}
 ARG MODELEXPRESS_REF={{ context.vllm.modelexpress_ref }}
+
+# aws-sdk-cpp tag for the NIXL OBJ / S3 backend (built in wheel_builder).
+ARG AWS_SDK_CPP_VERSION={{ context.vllm.aws_sdk_cpp_version }}
 {% endif %}
 {%- endif -%}
 

@@ -22,7 +22,7 @@ RUN apt clean && apt-get update -y && \
 
 # Install sccache into the base image so downstream stages can COPY it
 # instead of downloading from GitHub (avoids 502 errors under parallel builds)
-ARG SCCACHE_VERSION=v0.14.0
+ARG SCCACHE_VERSION
 RUN ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64") && \
     wget --tries=3 --waitretry=5 \
         "https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz" && \

@@ -42,13 +42,17 @@ COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
 COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
 COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
 
-# Always copy FFmpeg so libs are available for Rust checks in CI
+# Always copy FFmpeg so libs are available for Rust checks in CI.
+# libvpx.so* is included because the in-tree ffmpeg is built with --enable-libvpx,
+# so libavcodec.so has a runtime dependency on libvpx.so.9.
 RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
     mkdir -p /usr/local/lib/pkgconfig && \
     cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
     cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
+    cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
     cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
-    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
+    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
+    ldconfig
 
 {% if target not in ("dev", "local-dev") %}
 # Copy built artifacts (not needed for dev/local-dev; users build from source)

@@ -29,15 +29,24 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
     # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
     && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
 
-{% if context.sglang.enable_media_ffmpeg == "true" %}
-# Copy ffmpeg
+# Copy ffmpeg from wheel_builder: versioned shared libs (libav*.so*,
+# libsw*.so*) for the Rust media-ffmpeg decoder, plus the LGPL CLI binary
+# (built with h264_nvenc + libvpx_vp9 encoders) that imageio targets via
+# IMAGEIO_FFMPEG_EXE for video encoding. Ungated by enable_media_ffmpeg
+# because the upstream lmsysorg/sglang base image always ships
+# imageio-ffmpeg with a GPL-encumbered prebuilt binary that we replace
+# unconditionally below; the LGPL CLI must be present so imageio has
+# something to target.
 RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
     mkdir -p /usr/local/lib/pkgconfig && \
     cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
-    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
+    cp -nL /tmp/usr/local/lib/libav*.so* /tmp/usr/local/lib/libsw*.so* /usr/local/lib/ && \
+    cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
     cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
-    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
-{% endif %}
+    cp -nL /tmp/usr/local/bin/ffmpeg /usr/local/bin/ffmpeg && \
+    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
+    ldconfig
+ENV IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg
 
 {% if target not in ("dev", "local-dev") %}
 # Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
@@ -86,6 +95,17 @@ RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tm
     export PIP_CACHE_DIR=/root/.cache/pip && \
     pip install --break-system-packages --no-deps $(grep -E '^nvtx==' /tmp/requirements.common.txt)
 
+# Replace the upstream lmsysorg/sglang image's imageio-ffmpeg (which ships a
+# GPL-encumbered prebuilt ffmpeg binary in <site-packages>/imageio_ffmpeg/binaries/)
+# with a source install that leaves no binary on disk. IMAGEIO_FFMPEG_EXE points
+# imageio at the LGPL CLI we copied from wheel_builder above. The --no-binary
+# directive lives in the requirements file itself.
+RUN --mount=type=bind,source=./container/deps/requirements.sglang.txt,target=/tmp/requirements.sglang.txt \
+    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    export PIP_CACHE_DIR=/root/.cache/pip && \
+    pip install --break-system-packages --force-reinstall --no-deps \
+        --requirement /tmp/requirements.sglang.txt
+
 # Copy tests, deploy and components for CI with correct ownership
 COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
 COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples

@@ -164,6 +164,20 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
     cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
 {% endif %}
 
+# Copy the in-tree LGPL ffmpeg from wheel_builder. The TRT-LLM diffusion handler
+# always encodes video (video_handler.py:263 → encode_to_video_bytes), so the
+# CLI and its libav* / libvpx runtime libs need to be present in this image and
+# imageio must be pointed at it via IMAGEIO_FFMPEG_EXE. Ungated by
+# enable_media_ffmpeg because TRT-LLM unconditionally needs the encoder.
+RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
+    cp -nL /tmp/usr/local/lib/libav*.so* /usr/local/lib/ 2>/dev/null || true && \
+    cp -nL /tmp/usr/local/lib/libsw*.so* /usr/local/lib/ 2>/dev/null || true && \
+    cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
+    cp -nL /tmp/usr/local/bin/ffmpeg /usr/local/bin/ffmpeg && \
+    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
+    ldconfig
+ENV IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg
+
 # Copy TensorRT and libgomp from framework image (arch-dependent path, needs root)
 COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt
 RUN --mount=type=bind,from=framework,source=/usr/lib,target=/mnt/usr_lib \

@@ -363,12 +363,19 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \
 
 # Install runtime dependencies (common + vllm-specific + benchmarks).
 # Test and dev dependencies are NOT installed here — they go in the test and dev images.
+# --reinstall-package imageio-ffmpeg forces a source rebuild of imageio-ffmpeg
+# (honoring the `--no-binary imageio-ffmpeg` directive in requirements.common.txt),
+# replacing the GPL-encumbered prebuilt ffmpeg binary that the upstream
+# vllm/vllm-openai base image ships. vLLM-Omni uses diffusers.export_to_video and
+# doesn't invoke imageio-ffmpeg, so no IMAGEIO_FFMPEG_EXE is needed — this is
+# purely to clear the GPL binary.
 RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tmp/requirements.common.txt \
     --mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \
     --mount=type=bind,source=./container/deps/requirements.benchmark.txt,target=/tmp/requirements.benchmark.txt \
     --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775,sharing=shared \
     export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
     uv pip install \
+        --reinstall-package imageio-ffmpeg \
         --requirement /tmp/requirements.common.txt \
         --requirement /tmp/requirements.vllm.txt \
         --requirement /tmp/requirements.benchmark.txt

@@ -252,9 +252,16 @@ RUN if [ "$USE_SCCACHE" = "true" ]; then \
 ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
     SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
 
-# Always build FFmpeg so libs are available for Rust checks in CI
-# Do not delete the source tarball for legal reasons
+# Always build FFmpeg so libs are available for Rust checks in CI.
+# We also build the ffmpeg CLI with h264_nvenc + libvpx_vp9 encoders so Python
+# code can encode video without the GPL-licensed binary shipped by imageio-ffmpeg.
+# Stays LGPL-only: --disable-gpl --disable-nonfree are preserved; H.264 comes from
+# NVIDIA's NVENC (proprietary HW encoder, already a runtime dependency of these
+# GPU images) and VP9 from libvpx (BSD).
+# Do not delete the source tarball for legal reasons.
 ARG FFMPEG_VERSION
+ARG NV_CODEC_HEADERS_REF
+ARG LIBVPX_REF
 RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token \
     --mount=type=secret,id=aws-role-arn,env=AWS_ROLE_ARN \
     export AWS_WEB_IDENTITY_TOKEN_FILE=/run/secrets/aws-token && \
@@ -263,11 +270,26 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
         eval $(/tmp/use-sccache.sh setup-env); \
     fi && \
     if [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
-    apt-get update -y && apt-get install -y build-essential pkg-config xz-utils; \
+    apt-get update -y && apt-get install -y build-essential pkg-config xz-utils git yasm; \
     apt-get clean && rm -rf /var/lib/apt/lists/*; \
     elif [ "$DEVICE" = "cuda" ]; then \
-    dnf install -y pkg-config xz; \
+    dnf install -y pkg-config xz git yasm; \
     fi && \
+    # nv-codec-headers: provides the NVENC/NVDEC API headers ffmpeg compiles against.
+    # Header-only, no runtime dep here; libcuda/libnvidia-encode are loaded at runtime
+    # in the consuming container.
+    cd /tmp && \
+    git clone --depth 1 --branch ${NV_CODEC_HEADERS_REF} https://github.com/FFmpeg/nv-codec-headers.git && \
+    make -C nv-codec-headers PREFIX=/usr/local install && \
+    # libvpx: BSD-licensed VP9 encoder needed for the WebM output path. Built from
+    # source so we don't need to track distro package names (libvpx-dev on Debian
+    # vs libvpx-devel via EPEL on RHEL/manylinux).
+    git clone --depth 1 --branch ${LIBVPX_REF} https://chromium.googlesource.com/webm/libvpx.git && \
+    cd libvpx && \
+    ./configure --prefix=/usr/local --enable-shared --disable-static --disable-examples --disable-unit-tests --disable-tools --disable-docs && \
+    make -j$(nproc) && \
+    make install && \
+    ldconfig && \
     cd /tmp && \
     curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
     tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
@@ -276,17 +298,21 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
         --prefix=/usr/local \
         --disable-gpl \
         --disable-nonfree \
-        --disable-programs \
         --disable-doc \
         --disable-static \
         --disable-x86asm \
         --disable-network \
-        --disable-encoders \
-        --disable-muxers \
         --disable-bsfs \
         --disable-devices \
         --disable-libdrm \
-        --enable-shared && \
+        --enable-shared \
+        --enable-nvenc \
+        --enable-libvpx \
+        --disable-encoders \
+        --enable-encoder=h264_nvenc,libvpx_vp9 \
+        --disable-muxers \
+        --enable-muxer=mov,mp4,matroska,webm \
+        --enable-protocol=file,pipe && \
     make -j$(nproc) && \
     make install && \
     /tmp/use-sccache.sh show-stats "FFMPEG" && \
@@ -395,7 +421,7 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
 
 {% if framework == "vllm" and device == "cuda" %}
 # Build and install AWS SDK C++ (required for NIXL OBJ backend / S3 support)
-ARG AWS_SDK_CPP_VERSION=1.11.760
+ARG AWS_SDK_CPP_VERSION
 RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token \
     --mount=type=secret,id=aws-role-arn,env=AWS_ROLE_ARN \
     export AWS_WEB_IDENTITY_TOKEN_FILE=/run/secrets/aws-token && \

diff --git a/docs/backends/trtllm/trtllm-diffusion.md b/docs/backends/trtllm/trtllm-diffusion.md
@@ -15,10 +15,12 @@ image generation through `--modality image_diffusion` flag.
 
 - **TensorRT-LLM with visual_gen**: The `visual_gen` module is part of TensorRT-LLM (`tensorrt_llm._torch.visual_gen`). Install TensorRT-LLM following the [official instructions](https://github.com/NVIDIA/TensorRT-LLM#installation).
 - **dynamo-runtime with multimodal API**: The Dynamo runtime must include `ModelType.Videos` or `ModelType.Images` support. Ensure you're using a compatible version.
-- **VIDEO diffusion: imageio with ffmpeg**: Required for encoding generated frames to MP4 video:
+- **VIDEO diffusion: imageio with ffmpeg**: Required for encoding generated frames to MP4 video. The Dynamo TRT-LLM runtime container ships an LGPL-only ffmpeg CLI built with the NVIDIA NVENC H.264 encoder (`h264_nvenc`) and `libvpx_vp9` for WebM, and points `imageio` at it via `IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg` — the GPL-encumbered ffmpeg binary normally shipped inside the `imageio-ffmpeg` PyPI wheel is **not** installed. If you're running outside the container, install the Python wrapper without the bundled binary and point it at your own ffmpeg:
   ```bash
-  pip install imageio[ffmpeg]
+  pip install --no-binary imageio-ffmpeg "imageio[ffmpeg]"
+  export IMAGEIO_FFMPEG_EXE=/path/to/your/ffmpeg
   ```
+  MP4 output requires an NVIDIA GPU at runtime (NVENC is a hardware encoder).
 
 ## Supported Models