diff --git a/components/src/dynamo/common/tests/test_video_utils.py b/components/src/dynamo/common/tests/test_video_utils.py
index fddb373412af..4285b50c7b64 100644
--- a/components/src/dynamo/common/tests/test_video_utils.py
+++ b/components/src/dynamo/common/tests/test_video_utils.py
@@ -26,131 +26,144 @@ def make_frames(n=3, h=8, w=8) -> np.ndarray:
 
 
 class TestEncodeToVideoBytes:
-    """Tests for encode_to_video_bytes()."""
-
-    def _mock_iio_v3(self):
-        """Return a mock that looks like imageio.v3 (has imwrite)."""
-        iio = MagicMock()
-        iio.imwrite = MagicMock()
-        return iio
-
-    def _mock_iio_v2(self):
-        """Return a mock that looks like imageio v2 (no imwrite, has get_writer)."""
-        iio = MagicMock(spec=[])  # no attributes by default
-        writer = MagicMock()
-        iio.get_writer = MagicMock(return_value=writer)
-        return iio, writer
-
-    def test_mp4_selects_h264_nvenc_codec(self):
+    """Tests for encode_to_video_bytes().
+
+    encode_to_video_bytes pre-converts RGB->YUV420p in numpy and shells out to
+    ffmpeg (feeding planar YUV on stdin) to sidestep the in-tree LGPL ffmpeg's
+    broken libswscale RGB->YUV path. These tests mock subprocess.run + the temp
+    file so no real ffmpeg is invoked.
+    """
+
+    def _patch_ffmpeg(self, read_bytes=b"video-bytes"):
+        """Patch subprocess.run (success) and the output tempfile.
+
+        Returns (run_patch, tempfile_patch); the run_patch's mock is what tests
+        assert against.
+        """
+        run_patch = patch("subprocess.run", MagicMock())
+        tmp = MagicMock()
+        tmp.read.return_value = read_bytes
+        ntf_cm = MagicMock()
+        ntf_cm.__enter__.return_value = tmp
+        tempfile_patch = patch(
+            "tempfile.NamedTemporaryFile", MagicMock(return_value=ntf_cm)
+        )
+        return run_patch, tempfile_patch
+
+    def test_mp4_uses_h264_nvenc(self):
         from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-        iio = self._mock_iio_v3()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio}):
-            buf = MagicMock()
-            buf.getvalue.return_value = b"fake-mp4"
-            mock_io.BytesIO.return_value = buf
-
+        run_patch, tempfile_patch = self._patch_ffmpeg()
+        with run_patch as mock_run, tempfile_patch:
             encode_to_video_bytes(make_frames(), fps=8, output_format="mp4")
 
-            iio.imwrite.assert_called_once()
-            _, kwargs = iio.imwrite.call_args
-            assert kwargs.get("codec") == "h264_nvenc"
-            assert kwargs.get("fps") == 8
+            cmd = mock_run.call_args[0][0]
+            assert "h264_nvenc" in cmd
+            assert mock_run.call_args[1]["check"] is True
 
-    def test_webm_selects_libvpx_vp9_codec(self):
+    def test_webm_uses_libvpx_vp9(self):
         from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-        iio = self._mock_iio_v3()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio}):
-            buf = MagicMock()
-            buf.getvalue.return_value = b"fake-webm"
-            mock_io.BytesIO.return_value = buf
-
+        run_patch, tempfile_patch = self._patch_ffmpeg()
+        with run_patch as mock_run, tempfile_patch:
             encode_to_video_bytes(make_frames(), fps=16, output_format="webm")
 
-            iio.imwrite.assert_called_once()
-            _, kwargs = iio.imwrite.call_args
-            assert kwargs.get("codec") == "libvpx-vp9"
+            assert "libvpx-vp9" in mock_run.call_args[0][0]
 
-    def test_mp4_passes_extension_to_imwrite(self):
+    def test_unsupported_format_raises_value_error(self):
         from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-        iio = self._mock_iio_v3()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio}):
-            buf = MagicMock()
-            buf.getvalue.return_value = b"bytes"
-            mock_io.BytesIO.return_value = buf
+        with pytest.raises(ValueError, match="No codec"):
+            encode_to_video_bytes(make_frames(), output_format="avi")
 
-            encode_to_video_bytes(make_frames(), output_format="mp4")
+    def test_bad_shape_raises_value_error(self):
+        from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-            _, kwargs = iio.imwrite.call_args
-            assert kwargs.get("extension") == ".mp4"
+        with pytest.raises(ValueError, match="Expected frames of shape"):
+            encode_to_video_bytes(
+                np.zeros((3, 8, 8), dtype=np.uint8), output_format="mp4"
+            )
+
+    def test_subprocess_failure_raises_runtime_error(self):
+        import subprocess
 
-    def test_webm_passes_extension_to_imwrite(self):
         from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-        iio = self._mock_iio_v3()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio}):
-            buf = MagicMock()
-            buf.getvalue.return_value = b"bytes"
-            mock_io.BytesIO.return_value = buf
+        err = subprocess.CalledProcessError(1, "ffmpeg", stderr=b"boom")
+        _, tempfile_patch = self._patch_ffmpeg()
+        with patch("subprocess.run", MagicMock(side_effect=err)), tempfile_patch:
+            with pytest.raises(RuntimeError, match="Video encoding to bytes failed"):
+                encode_to_video_bytes(make_frames(), output_format="mp4")
 
-            encode_to_video_bytes(make_frames(), output_format="webm")
+    def test_returns_file_bytes(self):
+        from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-            _, kwargs = iio.imwrite.call_args
-            assert kwargs.get("extension") == ".webm"
+        run_patch, tempfile_patch = self._patch_ffmpeg(read_bytes=b"\x00\x01\x02")
+        with run_patch, tempfile_patch:
+            result = encode_to_video_bytes(make_frames(), output_format="mp4")
 
-    def test_unsupported_format_raises_value_error(self):
-        from dynamo.common.utils.video_utils import encode_to_video_bytes
+        assert result == b"\x00\x01\x02"
 
-        iio = self._mock_iio_v3()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio}):
-            mock_io.BytesIO.return_value = MagicMock()
 
-            # ValueError is wrapped into RuntimeError by the except block
-            with pytest.raises(RuntimeError, match="Video encoding to bytes failed"):
-                encode_to_video_bytes(make_frames(), output_format="avi")
+# ---------------------------------------------------------------------------
+# normalize_image_frames
+# ---------------------------------------------------------------------------
 
-    def test_returns_bytes_from_buffer(self):
-        from dynamo.common.utils.video_utils import encode_to_video_bytes
 
-        expected = b"\x00\x01\x02"
-        iio = self._mock_iio_v3()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio}):
-            buf = MagicMock()
-            buf.getvalue.return_value = expected
-            mock_io.BytesIO.return_value = buf
+class TestNormalizeImageFrames:
+    """Tests for normalize_image_frames() — flattens DiffusionFormatter image
+    inputs to PIL. Image pipelines usually emit PIL Images; the Cosmos3 native
+    pipeline emits 5D numpy ``[B, F, H, W, C]``."""
 
-            result = encode_to_video_bytes(make_frames(), output_format="mp4")
+    def test_pil_inputs_returned_by_identity(self):
+        """PIL inputs must pass through without conversion or copy."""
+        from PIL import Image
 
-        assert result == expected
+        from dynamo.common.utils.video_utils import normalize_image_frames
 
-    def test_v2_api_fallback_writes_all_frames(self):
-        """When imageio.v3.imwrite is absent, falls back to get_writer loop."""
-        from dynamo.common.utils.video_utils import encode_to_video_bytes
+        a = Image.new("RGB", (4, 4), (255, 0, 0))
+        b = Image.new("RGB", (4, 4), (0, 255, 0))
+        out = normalize_image_frames([a, b])
+
+        assert len(out) == 2
+        assert out[0] is a and out[1] is b
+
+    def test_uint8_hwc_numpy_preserves_pixels(self):
+        from PIL import Image
+
+        from dynamo.common.utils.video_utils import normalize_image_frames
+
+        arr = np.full((4, 4, 3), 7, dtype=np.uint8)
+        out = normalize_image_frames([arr])
+
+        assert len(out) == 1
+        assert isinstance(out[0], Image.Image)
+        assert out[0].size == (4, 4)  # PIL is (W, H)
+        assert np.asarray(out[0])[0, 0].tolist() == [7, 7, 7]
+
+    def test_cosmos3_5d_strips_batch_and_preserves_frame_order(self):
+        """[B, F, H, W, C] collapses to F PIL frames in order. Distinct
+        per-frame content guards against wrong-axis indexing regressions."""
+        from dynamo.common.utils.video_utils import normalize_image_frames
+
+        arr = np.zeros((1, 3, 4, 4, 3), dtype=np.uint8)
+        arr[0, 0] = 10  # frame 0 fill
+        arr[0, 1] = 20  # frame 1 fill
+        arr[0, 2] = 30  # frame 2 fill
+
+        out = normalize_image_frames([arr])
+
+        assert len(out) == 3
+        assert np.asarray(out[0])[0, 0, 0] == 10
+        assert np.asarray(out[1])[0, 0, 0] == 20
+        assert np.asarray(out[2])[0, 0, 0] == 30
 
-        iio_v2, writer = self._mock_iio_v2()
-        with patch("dynamo.common.utils.video_utils.io") as mock_io, patch(
-            "imageio.v3", iio_v2, create=True
-        ), patch.dict("sys.modules", {"imageio.v3": iio_v2}):
-            buf = MagicMock()
-            buf.getvalue.return_value = b"v2-bytes"
-            mock_io.BytesIO.return_value = buf
+    def test_float_zero_to_one_scaled_to_uint8(self):
+        """float32 [0, 1] inputs must be rescaled to uint8 [0, 255]."""
+        from dynamo.common.utils.video_utils import normalize_image_frames
 
-            frames = make_frames(n=4)
-            encode_to_video_bytes(frames, output_format="mp4")
+        arr = np.full((4, 4, 3), 0.5, dtype=np.float32)
+        out = normalize_image_frames([arr])
 
-            assert writer.append_data.call_count == 4
-            writer.close.assert_called_once()
+        # 0.5 * 255 = 127.5; numpy's banker's rounding yields exactly 128.
+        assert np.asarray(out[0])[0, 0, 0] == 128
diff --git a/components/src/dynamo/common/utils/video_utils.py b/components/src/dynamo/common/utils/video_utils.py
index 347df37e4bcd..ebeeac7d9baa 100644
--- a/components/src/dynamo/common/utils/video_utils.py
+++ b/components/src/dynamo/common/utils/video_utils.py
@@ -7,7 +7,6 @@
 video frames to MP4 format.
 """
 
-import io
 import logging
 import os
 from typing import Tuple
@@ -82,6 +81,34 @@ def normalize_video_frames(images: list) -> list:
     return list(frames)
 
 
+def normalize_image_frames(images: list) -> list:
+    """Normalize stage_output.images into a flat list of PIL Images.
+
+    Image diffusion pipelines usually return PIL Images, but some (e.g. the
+    Cosmos3 native pipeline) return numpy arrays shaped ``[batch, frames, H, W,
+    C]`` even for single images. Collapse leading batch/frame dims and convert
+    each frame to a PIL Image; PIL inputs pass through unchanged.
+    """
+    from PIL import Image
+
+    out: list = []
+    for item in images:
+        if isinstance(item, Image.Image):
+            out.append(item)
+            continue
+        arr = np.asarray(item)
+        while arr.ndim > 4:  # [batch, frames, H, W, C] -> [frames, H, W, C]
+            arr = arr[0]
+        if arr.dtype != np.uint8:  # frames share a dtype/range; convert once
+            arr = ((arr.clip(0, 1) * 255).round() if arr.max() <= 1.0 else arr).astype(
+                np.uint8
+            )
+        frames = arr if arr.ndim == 4 else arr[None]  # -> [N, H, W, C]
+        for frame in frames:
+            out.append(Image.fromarray(frame))
+    return out
+
+
 def frames_to_numpy(images: list) -> np.ndarray:
     """Convert a list of PIL Images to a numpy array suitable for video encoding.
 
@@ -177,6 +204,29 @@ def encode_to_mp4(
         raise RuntimeError(f"Video encoding failed: {e}") from e
 
 
+def _rgb_to_yuv420p(frames: np.ndarray) -> bytes:
+    """Convert RGB frames (N, H, W, 3) uint8 to planar YUV420p bytes.
+
+    Done in numpy (BT.601, full range) so ffmpeg never performs the RGB->YUV
+    conversion itself: the in-tree LGPL ffmpeg's libswscale RGB->YUV path is
+    broken and collapses chroma (greens render as magenta). H and W must be even.
+    """
+    rgb = frames.astype(np.float32)
+    r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2]
+    y = 0.299 * r + 0.587 * g + 0.114 * b
+    u = -0.168736 * r - 0.331264 * g + 0.5 * b + 128.0
+    v = 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0
+    n, h, w = y.shape
+    y = y.round().clip(0, 255).astype(np.uint8)
+    # 4:2:0 -- box-average each 2x2 chroma block
+    u = u.reshape(n, h // 2, 2, w // 2, 2).mean((2, 4)).round().clip(0, 255).astype(np.uint8)
+    v = v.reshape(n, h // 2, 2, w // 2, 2).mean((2, 4)).round().clip(0, 255).astype(np.uint8)
+    out = bytearray()
+    for i in range(n):
+        out += y[i].tobytes() + u[i].tobytes() + v[i].tobytes()
+    return bytes(out)
+
+
 def encode_to_video_bytes(
     frames: np.ndarray,
     fps: int = 16,
@@ -194,51 +244,43 @@ def encode_to_video_bytes(
         Encoded video as bytes.
 
     Raises:
-        ImportError: If imageio is not available.
         RuntimeError: If encoding fails.
     """
-    try:
-        import imageio.v3 as iio
-    except ImportError:
+    import subprocess
+    import tempfile
+
+    codec = {"mp4": "h264_nvenc", "webm": "libvpx-vp9"}.get(output_format)
+    if codec is None:
+        raise ValueError(f"No codec specified for response format: {output_format}")
+
+    frames = np.asarray(frames)
+    if frames.ndim != 4 or frames.shape[-1] != 3:
+        raise ValueError(f"Expected frames of shape (N, H, W, 3), got {frames.shape}")
+    n, h, w, _ = frames.shape
+    h, w = h & ~1, w & ~1  # yuv420p needs even dimensions
+    frames = frames[:, :h, :w, :]
+
+    logger.info(f"Encoding {n} frames to {output_format} bytes at {fps} fps")
+
+    # Pre-convert RGB->YUV420p in numpy and feed planar YUV directly, bypassing
+    # the in-tree ffmpeg's broken libswscale RGB->YUV path.
+    yuv = _rgb_to_yuv420p(frames)
+    ffmpeg = os.environ.get("IMAGEIO_FFMPEG_EXE", "ffmpeg")
+    cmd = [
+        ffmpeg, "-y", "-v", "error",
+        "-f", "rawvideo", "-pix_fmt", "yuv420p", "-s", f"{w}x{h}",
+        "-r", str(fps), "-color_range", "pc", "-i", "-",
+        "-c:v", codec, "-pix_fmt", "yuv420p", "-color_range", "pc",
+    ]
+    with tempfile.NamedTemporaryFile(suffix=f".{output_format}") as tmp:
         try:
-            import imageio as iio  # type: ignore[no-redef]
-        except ImportError:
-            raise ImportError(
-                "imageio is required for video encoding. "
-                "Install with: pip install imageio[ffmpeg]"
-            )
-
-    logger.info(f"Encoding {len(frames)} frames to {output_format} bytes at {fps} fps")
-
-    try:
-        buffer = io.BytesIO()
-
-        kwargs: dict = {"fps": fps}
-        if output_format == "webm":
-            kwargs["codec"] = "libvpx-vp9"
-        elif output_format == "mp4":
-            kwargs["codec"] = "h264_nvenc"
-        else:
-            raise ValueError(f"No codec specified for response format: {output_format}")
-
-        if hasattr(iio, "imwrite"):
-            # v3 API
-            iio.imwrite(buffer, frames, extension=f".{output_format}", **kwargs)
-        else:
-            # v2 API
-            writer = iio.get_writer(  # type: ignore[attr-defined]
-                buffer, format="FFMPEG", mode="I", **kwargs
-            )
-            try:
-                for frame in frames:
-                    writer.append_data(frame)
-            finally:
-                writer.close()
-
-        video_bytes = buffer.getvalue()
-        logger.info(f"Encoded video to {len(video_bytes)} bytes")
-        return video_bytes
-
-    except Exception as e:
-        logger.error(f"Failed to encode video to bytes: {e}")
-        raise RuntimeError(f"Video encoding to bytes failed: {e}") from e
+            subprocess.run(cmd + [tmp.name], input=yuv, check=True, capture_output=True)
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(
+                f"Video encoding to bytes failed: {e.stderr.decode(errors='replace')}"
+            ) from e
+        tmp.seek(0)
+        video_bytes = tmp.read()
+
+    logger.info(f"Encoded video to {len(video_bytes)} bytes")
+    return video_bytes
diff --git a/components/src/dynamo/vllm/omni/output_formatter.py b/components/src/dynamo/vllm/omni/output_formatter.py
index 9816bd3f69a5..b842a28e133c 100644
--- a/components/src/dynamo/vllm/omni/output_formatter.py
+++ b/components/src/dynamo/vllm/omni/output_formatter.py
@@ -11,7 +11,6 @@
 import asyncio
 import base64
 import logging
-import tempfile
 import time
 import uuid
 from io import BytesIO
@@ -20,7 +19,6 @@
 import numpy as np
 import soundfile as sf
 import torch
-from diffusers.utils.export_utils import export_to_video
 
 from dynamo.common.protocols.audio_protocol import AudioData, NvAudioSpeechResponse
 from dynamo.common.protocols.image_protocol import ImageData, NvImagesResponse
@@ -28,7 +26,11 @@
 from dynamo.common.storage import upload_to_fs
 from dynamo.common.utils.engine_response import normalize_finish_reason
 from dynamo.common.utils.output_modalities import RequestType
-from dynamo.common.utils.video_utils import normalize_video_frames
+from dynamo.common.utils.video_utils import (
+    encode_to_video_bytes,
+    frames_to_numpy,
+    normalize_image_frames,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -139,12 +141,16 @@ async def _encode_video(
             )
         try:
             start_time = time.time()
-            frame_list = normalize_video_frames(images)
-            with tempfile.NamedTemporaryFile(
-                suffix=f".{output_format}", delete=True
-            ) as tmp:
-                await asyncio.to_thread(export_to_video, frame_list, tmp.name, fps)
-                video_bytes = tmp.read()
+            # Encode through the shared LGPL-safe encoder: it uses the in-tree
+            # ffmpeg's h264_nvenc (NVIDIA HW) encoder rather than the GPL libx264
+            # that diffusers.export_to_video would default to for MP4.
+            frames_np = frames_to_numpy(normalize_image_frames(images))
+            video_bytes = await asyncio.to_thread(
+                encode_to_video_bytes,
+                frames_np,
+                fps=fps,
+                output_format=output_format,
+            )
 
             if response_format == "b64_json":
                 video_data = VideoData(
@@ -241,7 +247,7 @@ async def _prepare_images(
         self, images: list, request_id: str, response_format: Optional[str] = None
     ) -> list:
         outlist = []
-        for img in images:
+        for img in normalize_image_frames(images):
             buf = BytesIO()
             img.save(buf, format="PNG")
             image_bytes = buf.getvalue()
diff --git a/components/src/dynamo/vllm/tests/omni/test_output_formatter.py b/components/src/dynamo/vllm/tests/omni/test_output_formatter.py
index 908124e0f805..48dc9073ab96 100644
--- a/components/src/dynamo/vllm/tests/omni/test_output_formatter.py
+++ b/components/src/dynamo/vllm/tests/omni/test_output_formatter.py
@@ -224,7 +224,7 @@ async def test_error_returns_failed_status(self):
 
         f = _make_diffusion_formatter()
         with patch(
-            "dynamo.vllm.omni.output_formatter.normalize_video_frames",
+            "dynamo.vllm.omni.output_formatter.normalize_image_frames",
             side_effect=RuntimeError("boom"),
         ):
             chunk = await f._encode_video([MagicMock()], "req-1", fps=16)
@@ -510,10 +510,17 @@ def _patches(self):
 
         return (
             _patch(
-                "dynamo.vllm.omni.output_formatter.normalize_video_frames",
+                "dynamo.vllm.omni.output_formatter.normalize_image_frames",
                 return_value=[MagicMock()],
             ),
-            _patch("dynamo.vllm.omni.output_formatter.export_to_video"),
+            _patch(
+                "dynamo.vllm.omni.output_formatter.frames_to_numpy",
+                return_value=MagicMock(),
+            ),
+            _patch(
+                "dynamo.vllm.omni.output_formatter.encode_to_video_bytes",
+                return_value=b"\x00\x01\x02\x03",
+            ),
             _patch(
                 "dynamo.vllm.omni.output_formatter.upload_to_fs",
                 return_value="http://x/v.mp4",
@@ -533,8 +540,8 @@ async def test_video_url_response_format(self):
         stage = MagicMock()
         stage.images = [MagicMock()]
 
-        p1, p2, p3, p4 = self._patches()
-        with p1, p2, p3 as mock_upload, p4:
+        p1, p2, p3, p4, p5 = self._patches()
+        with p1, p2, p3, p4 as mock_upload, p5:
             result = await f.format(
                 stage,
                 "r5",
@@ -560,8 +567,8 @@ async def test_video_b64_response_format(self):
         stage = MagicMock()
         stage.images = [MagicMock()]
 
-        p1, p2, p3, p4 = self._patches()
-        with p1, p2, p3 as mock_upload, p4:
+        p1, p2, p3, p4, p5 = self._patches()
+        with p1, p2, p3, p4 as mock_upload, p5:
             result = await f.format(
                 stage,
                 "r6",
@@ -587,8 +594,8 @@ async def test_video_default_response_format_is_url(self):
         stage = MagicMock()
         stage.images = [MagicMock()]
 
-        p1, p2, p3, p4 = self._patches()
-        with p1, p2, p3 as mock_upload, p4:
+        p1, p2, p3, p4, p5 = self._patches()
+        with p1, p2, p3, p4 as mock_upload, p5:
             result = await f.format(
                 stage, "r7", request_type=RequestType.VIDEO_GENERATION, fps=16
             )
diff --git a/container/templates/vllm_runtime.Dockerfile b/container/templates/vllm_runtime.Dockerfile
index 2715c9f7c6f7..73ac34d065f5 100644
--- a/container/templates/vllm_runtime.Dockerfile
+++ b/container/templates/vllm_runtime.Dockerfile
@@ -165,14 +165,17 @@ RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \
         if [ -n "$GMS_WHEEL" ]; then uv pip install {{ pip_target }} --no-deps "$GMS_WHEEL"; fi; \
     fi
 
-# vLLM-Omni's audio helpers shell out to SoX, and the launch script examples use
-# jq for readable curl output just like the upstream omni image does.
+# The launch script examples use jq for readable curl output just like the
+# upstream omni image does.
+#
+# NOTE: vLLM-Omni no longer shells out to the GPL SoX binary — its audio
+# normalization is a pure-numpy peak_normalize() (vllm_omni/utils/audio.py), so
+# sox / libsox-fmt-all (and their GPL/UNKNOWN codec deps) are intentionally not
+# installed here.
 RUN set -eux; \
     apt-get update; \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        jq \
-        sox \
-        libsox-fmt-all; \
+        jq; \
     rm -rf /var/lib/apt/lists/*
 
 # Layer the released vLLM-Omni package matching the pinned upstream ref while
@@ -194,22 +197,53 @@ RUN uv pip uninstall triton && \
 {% endif %}
 {% endif %}
 
-{% if context.vllm.enable_media_ffmpeg == "true" %}
-# Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo)
+# The upstream vllm/vllm-openai base image ships a GPL/GPL-3.0 ffmpeg built
+# against libx264/libx265/libmp3lame. Purge that entire apt codec stack and
+# replace it with the LGPL-only in-tree ffmpeg built in wheel_builder
+# (--disable-gpl --disable-nonfree; H.264 via NVENC, VP9 via libvpx). PyAV,
+# torchaudio, torchvision, soundfile and Pillow all bundle their own libraries
+# and do not link the system ffmpeg/codecs, so removing them is safe. dpkg-query
+# keeps the purge robust across base-image/arch version suffixes (e.g.
+# libavcodec58 vs 60), and autoremove then sweeps the now-orphaned media deps.
+#
+# CRITICAL: the base image marks the CUDA math libs (libcublas/libcusolver/
+# libcusparse) auto-installed, and the torch wheels here ship NO bundled cublas
+# — torch loads the system copies. A bare autoremove would delete them and break
+# GPU inference, so pin every CUDA/NVIDIA lib as manually-installed first.
+RUN set -eux; \
+    keep=$(dpkg-query -W -f='${Package}\n' 2>/dev/null \
+        | grep -E '^(libcu|libnv|libnccl|cuda)' || true); \
+    if [ -n "$keep" ]; then apt-mark manual $keep >/dev/null; fi; \
+    purge=$(dpkg-query -W -f='${Package}\n' 2>/dev/null \
+        | grep -E '^(ffmpeg|libav[a-z]|libsw[a-z]|libpostproc|libx264|libx265|libmp3lame|libaom|libdav1d|libvpx|libtheora|libvorbis|libopus|libsoxr)' \
+        || true); \
+    if [ -n "$purge" ]; then \
+        DEBIAN_FRONTEND=noninteractive apt-get purge -y $purge; \
+    fi; \
+    DEBIAN_FRONTEND=noninteractive apt-get autoremove -y --purge; \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy the LGPL ffmpeg from wheel_builder: versioned shared libs (libav*.so*,
+# libsw*.so*) plus the LGPL CLI binary that imageio/diffusers target via
+# IMAGEIO_FFMPEG_EXE for video encoding. Ungated by enable_media_ffmpeg because
+# the base GPL ffmpeg was just purged, so the LGPL CLI must always be present
+# for the omni video-export path to have something to encode with.
 RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
     mkdir -p /usr/local/lib/pkgconfig && \
     cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
-    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
+    cp -nL /tmp/usr/local/lib/libav*.so* /tmp/usr/local/lib/libsw*.so* /usr/local/lib/ && \
+    cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \
     cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
-    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
-{% endif %}
+    cp -nL /tmp/usr/local/bin/ffmpeg /usr/local/bin/ffmpeg && \
+    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \
+    ldconfig
+ENV IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg
 
-# Replace the upstream vllm/vllm-openai image's imageio-ffmpeg (which ships
-# a GPL-encumbered prebuilt ffmpeg binary) with a source install that leaves
-# no binary on disk. vLLM-Omni uses diffusers.export_to_video and doesn't
-# invoke imageio-ffmpeg, so no IMAGEIO_FFMPEG_EXE is needed — this is
-# purely to clear the GPL binary. The --no-binary directive lives in the
-# requirements file itself.
+# Replace the upstream vllm/vllm-openai image's imageio-ffmpeg (which ships a
+# GPL-encumbered prebuilt ffmpeg binary in <site-packages>/imageio_ffmpeg/binaries/)
+# with a source install that leaves no binary on disk. IMAGEIO_FFMPEG_EXE (set
+# above) points imageio at the LGPL CLI copied from wheel_builder. The
+# --no-binary directive lives in the requirements file itself.
 RUN --mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \
     --mount=type=cache,target=/root/.cache/uv,sharing=locked \
     export UV_CACHE_DIR=/root/.cache/uv && \