diff --git a/components/src/dynamo/common/tests/test_video_utils.py b/components/src/dynamo/common/tests/test_video_utils.py index fddb373412af..4285b50c7b64 100644 --- a/components/src/dynamo/common/tests/test_video_utils.py +++ b/components/src/dynamo/common/tests/test_video_utils.py @@ -26,131 +26,144 @@ def make_frames(n=3, h=8, w=8) -> np.ndarray: class TestEncodeToVideoBytes: - """Tests for encode_to_video_bytes().""" - - def _mock_iio_v3(self): - """Return a mock that looks like imageio.v3 (has imwrite).""" - iio = MagicMock() - iio.imwrite = MagicMock() - return iio - - def _mock_iio_v2(self): - """Return a mock that looks like imageio v2 (no imwrite, has get_writer).""" - iio = MagicMock(spec=[]) # no attributes by default - writer = MagicMock() - iio.get_writer = MagicMock(return_value=writer) - return iio, writer - - def test_mp4_selects_h264_nvenc_codec(self): + """Tests for encode_to_video_bytes(). + + encode_to_video_bytes pre-converts RGB->YUV420p in numpy and shells out to + ffmpeg (feeding planar YUV on stdin) to sidestep the in-tree LGPL ffmpeg's + broken libswscale RGB->YUV path. These tests mock subprocess.run + the temp + file so no real ffmpeg is invoked. + """ + + def _patch_ffmpeg(self, read_bytes=b"video-bytes"): + """Patch subprocess.run (success) and the output tempfile. + + Returns (run_patch, tempfile_patch); the run_patch's mock is what tests + assert against. + """ + run_patch = patch("subprocess.run", MagicMock()) + tmp = MagicMock() + tmp.read.return_value = read_bytes + ntf_cm = MagicMock() + ntf_cm.__enter__.return_value = tmp + tempfile_patch = patch( + "tempfile.NamedTemporaryFile", MagicMock(return_value=ntf_cm) + ) + return run_patch, tempfile_patch + + def test_mp4_uses_h264_nvenc(self): from dynamo.common.utils.video_utils import encode_to_video_bytes - iio = self._mock_iio_v3() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio}): - buf = MagicMock() - buf.getvalue.return_value = b"fake-mp4" - mock_io.BytesIO.return_value = buf - + run_patch, tempfile_patch = self._patch_ffmpeg() + with run_patch as mock_run, tempfile_patch: encode_to_video_bytes(make_frames(), fps=8, output_format="mp4") - iio.imwrite.assert_called_once() - _, kwargs = iio.imwrite.call_args - assert kwargs.get("codec") == "h264_nvenc" - assert kwargs.get("fps") == 8 + cmd = mock_run.call_args[0][0] + assert "h264_nvenc" in cmd + assert mock_run.call_args[1]["check"] is True - def test_webm_selects_libvpx_vp9_codec(self): + def test_webm_uses_libvpx_vp9(self): from dynamo.common.utils.video_utils import encode_to_video_bytes - iio = self._mock_iio_v3() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio}): - buf = MagicMock() - buf.getvalue.return_value = b"fake-webm" - mock_io.BytesIO.return_value = buf - + run_patch, tempfile_patch = self._patch_ffmpeg() + with run_patch as mock_run, tempfile_patch: encode_to_video_bytes(make_frames(), fps=16, output_format="webm") - iio.imwrite.assert_called_once() - _, kwargs = iio.imwrite.call_args - assert kwargs.get("codec") == "libvpx-vp9" + assert "libvpx-vp9" in mock_run.call_args[0][0] - def test_mp4_passes_extension_to_imwrite(self): + def test_unsupported_format_raises_value_error(self): from dynamo.common.utils.video_utils import encode_to_video_bytes - iio = self._mock_iio_v3() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio}): - buf = MagicMock() - buf.getvalue.return_value = b"bytes" - mock_io.BytesIO.return_value = buf + with pytest.raises(ValueError, match="No codec"): + encode_to_video_bytes(make_frames(), output_format="avi") - encode_to_video_bytes(make_frames(), output_format="mp4") + def test_bad_shape_raises_value_error(self): + from dynamo.common.utils.video_utils import encode_to_video_bytes - _, kwargs = iio.imwrite.call_args - assert kwargs.get("extension") == ".mp4" + with pytest.raises(ValueError, match="Expected frames of shape"): + encode_to_video_bytes( + np.zeros((3, 8, 8), dtype=np.uint8), output_format="mp4" + ) + + def test_subprocess_failure_raises_runtime_error(self): + import subprocess - def test_webm_passes_extension_to_imwrite(self): from dynamo.common.utils.video_utils import encode_to_video_bytes - iio = self._mock_iio_v3() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio}): - buf = MagicMock() - buf.getvalue.return_value = b"bytes" - mock_io.BytesIO.return_value = buf + err = subprocess.CalledProcessError(1, "ffmpeg", stderr=b"boom") + _, tempfile_patch = self._patch_ffmpeg() + with patch("subprocess.run", MagicMock(side_effect=err)), tempfile_patch: + with pytest.raises(RuntimeError, match="Video encoding to bytes failed"): + encode_to_video_bytes(make_frames(), output_format="mp4") - encode_to_video_bytes(make_frames(), output_format="webm") + def test_returns_file_bytes(self): + from dynamo.common.utils.video_utils import encode_to_video_bytes - _, kwargs = iio.imwrite.call_args - assert kwargs.get("extension") == ".webm" + run_patch, tempfile_patch = self._patch_ffmpeg(read_bytes=b"\x00\x01\x02") + with run_patch, tempfile_patch: + result = encode_to_video_bytes(make_frames(), output_format="mp4") - def test_unsupported_format_raises_value_error(self): - from dynamo.common.utils.video_utils import encode_to_video_bytes + assert result == b"\x00\x01\x02" - iio = self._mock_iio_v3() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio}): - mock_io.BytesIO.return_value = MagicMock() - # ValueError is wrapped into RuntimeError by the except block - with pytest.raises(RuntimeError, match="Video encoding to bytes failed"): - encode_to_video_bytes(make_frames(), output_format="avi") +# --------------------------------------------------------------------------- +# normalize_image_frames +# --------------------------------------------------------------------------- - def test_returns_bytes_from_buffer(self): - from dynamo.common.utils.video_utils import encode_to_video_bytes - expected = b"\x00\x01\x02" - iio = self._mock_iio_v3() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio}): - buf = MagicMock() - buf.getvalue.return_value = expected - mock_io.BytesIO.return_value = buf +class TestNormalizeImageFrames: + """Tests for normalize_image_frames() — flattens DiffusionFormatter image + inputs to PIL. Image pipelines usually emit PIL Images; the Cosmos3 native + pipeline emits 5D numpy ``[B, F, H, W, C]``.""" - result = encode_to_video_bytes(make_frames(), output_format="mp4") + def test_pil_inputs_returned_by_identity(self): + """PIL inputs must pass through without conversion or copy.""" + from PIL import Image - assert result == expected + from dynamo.common.utils.video_utils import normalize_image_frames - def test_v2_api_fallback_writes_all_frames(self): - """When imageio.v3.imwrite is absent, falls back to get_writer loop.""" - from dynamo.common.utils.video_utils import encode_to_video_bytes + a = Image.new("RGB", (4, 4), (255, 0, 0)) + b = Image.new("RGB", (4, 4), (0, 255, 0)) + out = normalize_image_frames([a, b]) + + assert len(out) == 2 + assert out[0] is a and out[1] is b + + def test_uint8_hwc_numpy_preserves_pixels(self): + from PIL import Image + + from dynamo.common.utils.video_utils import normalize_image_frames + + arr = np.full((4, 4, 3), 7, dtype=np.uint8) + out = normalize_image_frames([arr]) + + assert len(out) == 1 + assert isinstance(out[0], Image.Image) + assert out[0].size == (4, 4) # PIL is (W, H) + assert np.asarray(out[0])[0, 0].tolist() == [7, 7, 7] + + def test_cosmos3_5d_strips_batch_and_preserves_frame_order(self): + """[B, F, H, W, C] collapses to F PIL frames in order. Distinct + per-frame content guards against wrong-axis indexing regressions.""" + from dynamo.common.utils.video_utils import normalize_image_frames + + arr = np.zeros((1, 3, 4, 4, 3), dtype=np.uint8) + arr[0, 0] = 10 # frame 0 fill + arr[0, 1] = 20 # frame 1 fill + arr[0, 2] = 30 # frame 2 fill + + out = normalize_image_frames([arr]) + + assert len(out) == 3 + assert np.asarray(out[0])[0, 0, 0] == 10 + assert np.asarray(out[1])[0, 0, 0] == 20 + assert np.asarray(out[2])[0, 0, 0] == 30 - iio_v2, writer = self._mock_iio_v2() - with patch("dynamo.common.utils.video_utils.io") as mock_io, patch( - "imageio.v3", iio_v2, create=True - ), patch.dict("sys.modules", {"imageio.v3": iio_v2}): - buf = MagicMock() - buf.getvalue.return_value = b"v2-bytes" - mock_io.BytesIO.return_value = buf + def test_float_zero_to_one_scaled_to_uint8(self): + """float32 [0, 1] inputs must be rescaled to uint8 [0, 255].""" + from dynamo.common.utils.video_utils import normalize_image_frames - frames = make_frames(n=4) - encode_to_video_bytes(frames, output_format="mp4") + arr = np.full((4, 4, 3), 0.5, dtype=np.float32) + out = normalize_image_frames([arr]) - assert writer.append_data.call_count == 4 - writer.close.assert_called_once() + # 0.5 * 255 = 127.5; numpy's banker's rounding yields exactly 128. + assert np.asarray(out[0])[0, 0, 0] == 128 diff --git a/components/src/dynamo/common/utils/video_utils.py b/components/src/dynamo/common/utils/video_utils.py index 347df37e4bcd..ebeeac7d9baa 100644 --- a/components/src/dynamo/common/utils/video_utils.py +++ b/components/src/dynamo/common/utils/video_utils.py @@ -7,7 +7,6 @@ video frames to MP4 format. """ -import io import logging import os from typing import Tuple @@ -82,6 +81,34 @@ def normalize_video_frames(images: list) -> list: return list(frames) +def normalize_image_frames(images: list) -> list: + """Normalize stage_output.images into a flat list of PIL Images. + + Image diffusion pipelines usually return PIL Images, but some (e.g. the + Cosmos3 native pipeline) return numpy arrays shaped ``[batch, frames, H, W, + C]`` even for single images. Collapse leading batch/frame dims and convert + each frame to a PIL Image; PIL inputs pass through unchanged. + """ + from PIL import Image + + out: list = [] + for item in images: + if isinstance(item, Image.Image): + out.append(item) + continue + arr = np.asarray(item) + while arr.ndim > 4: # [batch, frames, H, W, C] -> [frames, H, W, C] + arr = arr[0] + if arr.dtype != np.uint8: # frames share a dtype/range; convert once + arr = ((arr.clip(0, 1) * 255).round() if arr.max() <= 1.0 else arr).astype( + np.uint8 + ) + frames = arr if arr.ndim == 4 else arr[None] # -> [N, H, W, C] + for frame in frames: + out.append(Image.fromarray(frame)) + return out + + def frames_to_numpy(images: list) -> np.ndarray: """Convert a list of PIL Images to a numpy array suitable for video encoding. @@ -177,6 +204,29 @@ def encode_to_mp4( raise RuntimeError(f"Video encoding failed: {e}") from e +def _rgb_to_yuv420p(frames: np.ndarray) -> bytes: + """Convert RGB frames (N, H, W, 3) uint8 to planar YUV420p bytes. + + Done in numpy (BT.601, full range) so ffmpeg never performs the RGB->YUV + conversion itself: the in-tree LGPL ffmpeg's libswscale RGB->YUV path is + broken and collapses chroma (greens render as magenta). H and W must be even. + """ + rgb = frames.astype(np.float32) + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + y = 0.299 * r + 0.587 * g + 0.114 * b + u = -0.168736 * r - 0.331264 * g + 0.5 * b + 128.0 + v = 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0 + n, h, w = y.shape + y = y.round().clip(0, 255).astype(np.uint8) + # 4:2:0 -- box-average each 2x2 chroma block + u = u.reshape(n, h // 2, 2, w // 2, 2).mean((2, 4)).round().clip(0, 255).astype(np.uint8) + v = v.reshape(n, h // 2, 2, w // 2, 2).mean((2, 4)).round().clip(0, 255).astype(np.uint8) + out = bytearray() + for i in range(n): + out += y[i].tobytes() + u[i].tobytes() + v[i].tobytes() + return bytes(out) + + def encode_to_video_bytes( frames: np.ndarray, fps: int = 16, @@ -194,51 +244,43 @@ def encode_to_video_bytes( Encoded video as bytes. Raises: - ImportError: If imageio is not available. RuntimeError: If encoding fails. """ - try: - import imageio.v3 as iio - except ImportError: + import subprocess + import tempfile + + codec = {"mp4": "h264_nvenc", "webm": "libvpx-vp9"}.get(output_format) + if codec is None: + raise ValueError(f"No codec specified for response format: {output_format}") + + frames = np.asarray(frames) + if frames.ndim != 4 or frames.shape[-1] != 3: + raise ValueError(f"Expected frames of shape (N, H, W, 3), got {frames.shape}") + n, h, w, _ = frames.shape + h, w = h & ~1, w & ~1 # yuv420p needs even dimensions + frames = frames[:, :h, :w, :] + + logger.info(f"Encoding {n} frames to {output_format} bytes at {fps} fps") + + # Pre-convert RGB->YUV420p in numpy and feed planar YUV directly, bypassing + # the in-tree ffmpeg's broken libswscale RGB->YUV path. + yuv = _rgb_to_yuv420p(frames) + ffmpeg = os.environ.get("IMAGEIO_FFMPEG_EXE", "ffmpeg") + cmd = [ + ffmpeg, "-y", "-v", "error", + "-f", "rawvideo", "-pix_fmt", "yuv420p", "-s", f"{w}x{h}", + "-r", str(fps), "-color_range", "pc", "-i", "-", + "-c:v", codec, "-pix_fmt", "yuv420p", "-color_range", "pc", + ] + with tempfile.NamedTemporaryFile(suffix=f".{output_format}") as tmp: try: - import imageio as iio # type: ignore[no-redef] - except ImportError: - raise ImportError( - "imageio is required for video encoding. " - "Install with: pip install imageio[ffmpeg]" - ) - - logger.info(f"Encoding {len(frames)} frames to {output_format} bytes at {fps} fps") - - try: - buffer = io.BytesIO() - - kwargs: dict = {"fps": fps} - if output_format == "webm": - kwargs["codec"] = "libvpx-vp9" - elif output_format == "mp4": - kwargs["codec"] = "h264_nvenc" - else: - raise ValueError(f"No codec specified for response format: {output_format}") - - if hasattr(iio, "imwrite"): - # v3 API - iio.imwrite(buffer, frames, extension=f".{output_format}", **kwargs) - else: - # v2 API - writer = iio.get_writer( # type: ignore[attr-defined] - buffer, format="FFMPEG", mode="I", **kwargs - ) - try: - for frame in frames: - writer.append_data(frame) - finally: - writer.close() - - video_bytes = buffer.getvalue() - logger.info(f"Encoded video to {len(video_bytes)} bytes") - return video_bytes - - except Exception as e: - logger.error(f"Failed to encode video to bytes: {e}") - raise RuntimeError(f"Video encoding to bytes failed: {e}") from e + subprocess.run(cmd + [tmp.name], input=yuv, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Video encoding to bytes failed: {e.stderr.decode(errors='replace')}" + ) from e + tmp.seek(0) + video_bytes = tmp.read() + + logger.info(f"Encoded video to {len(video_bytes)} bytes") + return video_bytes diff --git a/components/src/dynamo/vllm/omni/output_formatter.py b/components/src/dynamo/vllm/omni/output_formatter.py index 9816bd3f69a5..b842a28e133c 100644 --- a/components/src/dynamo/vllm/omni/output_formatter.py +++ b/components/src/dynamo/vllm/omni/output_formatter.py @@ -11,7 +11,6 @@ import asyncio import base64 import logging -import tempfile import time import uuid from io import BytesIO @@ -20,7 +19,6 @@ import numpy as np import soundfile as sf import torch -from diffusers.utils.export_utils import export_to_video from dynamo.common.protocols.audio_protocol import AudioData, NvAudioSpeechResponse from dynamo.common.protocols.image_protocol import ImageData, NvImagesResponse @@ -28,7 +26,11 @@ from dynamo.common.storage import upload_to_fs from dynamo.common.utils.engine_response import normalize_finish_reason from dynamo.common.utils.output_modalities import RequestType -from dynamo.common.utils.video_utils import normalize_video_frames +from dynamo.common.utils.video_utils import ( + encode_to_video_bytes, + frames_to_numpy, + normalize_image_frames, +) logger = logging.getLogger(__name__) @@ -139,12 +141,16 @@ async def _encode_video( ) try: start_time = time.time() - frame_list = normalize_video_frames(images) - with tempfile.NamedTemporaryFile( - suffix=f".{output_format}", delete=True - ) as tmp: - await asyncio.to_thread(export_to_video, frame_list, tmp.name, fps) - video_bytes = tmp.read() + # Encode through the shared LGPL-safe encoder: it uses the in-tree + # ffmpeg's h264_nvenc (NVIDIA HW) encoder rather than the GPL libx264 + # that diffusers.export_to_video would default to for MP4. + frames_np = frames_to_numpy(normalize_image_frames(images)) + video_bytes = await asyncio.to_thread( + encode_to_video_bytes, + frames_np, + fps=fps, + output_format=output_format, + ) if response_format == "b64_json": video_data = VideoData( @@ -241,7 +247,7 @@ async def _prepare_images( self, images: list, request_id: str, response_format: Optional[str] = None ) -> list: outlist = [] - for img in images: + for img in normalize_image_frames(images): buf = BytesIO() img.save(buf, format="PNG") image_bytes = buf.getvalue() diff --git a/components/src/dynamo/vllm/tests/omni/test_output_formatter.py b/components/src/dynamo/vllm/tests/omni/test_output_formatter.py index 908124e0f805..48dc9073ab96 100644 --- a/components/src/dynamo/vllm/tests/omni/test_output_formatter.py +++ b/components/src/dynamo/vllm/tests/omni/test_output_formatter.py @@ -224,7 +224,7 @@ async def test_error_returns_failed_status(self): f = _make_diffusion_formatter() with patch( - "dynamo.vllm.omni.output_formatter.normalize_video_frames", + "dynamo.vllm.omni.output_formatter.normalize_image_frames", side_effect=RuntimeError("boom"), ): chunk = await f._encode_video([MagicMock()], "req-1", fps=16) @@ -510,10 +510,17 @@ def _patches(self): return ( _patch( - "dynamo.vllm.omni.output_formatter.normalize_video_frames", + "dynamo.vllm.omni.output_formatter.normalize_image_frames", return_value=[MagicMock()], ), - _patch("dynamo.vllm.omni.output_formatter.export_to_video"), + _patch( + "dynamo.vllm.omni.output_formatter.frames_to_numpy", + return_value=MagicMock(), + ), + _patch( + "dynamo.vllm.omni.output_formatter.encode_to_video_bytes", + return_value=b"\x00\x01\x02\x03", + ), _patch( "dynamo.vllm.omni.output_formatter.upload_to_fs", return_value="http://x/v.mp4", @@ -533,8 +540,8 @@ async def test_video_url_response_format(self): stage = MagicMock() stage.images = [MagicMock()] - p1, p2, p3, p4 = self._patches() - with p1, p2, p3 as mock_upload, p4: + p1, p2, p3, p4, p5 = self._patches() + with p1, p2, p3, p4 as mock_upload, p5: result = await f.format( stage, "r5", @@ -560,8 +567,8 @@ async def test_video_b64_response_format(self): stage = MagicMock() stage.images = [MagicMock()] - p1, p2, p3, p4 = self._patches() - with p1, p2, p3 as mock_upload, p4: + p1, p2, p3, p4, p5 = self._patches() + with p1, p2, p3, p4 as mock_upload, p5: result = await f.format( stage, "r6", @@ -587,8 +594,8 @@ async def test_video_default_response_format_is_url(self): stage = MagicMock() stage.images = [MagicMock()] - p1, p2, p3, p4 = self._patches() - with p1, p2, p3 as mock_upload, p4: + p1, p2, p3, p4, p5 = self._patches() + with p1, p2, p3, p4 as mock_upload, p5: result = await f.format( stage, "r7", request_type=RequestType.VIDEO_GENERATION, fps=16 ) diff --git a/container/templates/vllm_runtime.Dockerfile b/container/templates/vllm_runtime.Dockerfile index 2715c9f7c6f7..73ac34d065f5 100644 --- a/container/templates/vllm_runtime.Dockerfile +++ b/container/templates/vllm_runtime.Dockerfile @@ -165,14 +165,17 @@ RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked \ if [ -n "$GMS_WHEEL" ]; then uv pip install {{ pip_target }} --no-deps "$GMS_WHEEL"; fi; \ fi -# vLLM-Omni's audio helpers shell out to SoX, and the launch script examples use -# jq for readable curl output just like the upstream omni image does. +# The launch script examples use jq for readable curl output just like the +# upstream omni image does. +# +# NOTE: vLLM-Omni no longer shells out to the GPL SoX binary — its audio +# normalization is a pure-numpy peak_normalize() (vllm_omni/utils/audio.py), so +# sox / libsox-fmt-all (and their GPL/UNKNOWN codec deps) are intentionally not +# installed here. RUN set -eux; \ apt-get update; \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - jq \ - sox \ - libsox-fmt-all; \ + jq; \ rm -rf /var/lib/apt/lists/* # Layer the released vLLM-Omni package matching the pinned upstream ref while @@ -194,22 +197,53 @@ RUN uv pip uninstall triton && \ {% endif %} {% endif %} -{% if context.vllm.enable_media_ffmpeg == "true" %} -# Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo) +# The upstream vllm/vllm-openai base image ships a GPL/GPL-3.0 ffmpeg built +# against libx264/libx265/libmp3lame. Purge that entire apt codec stack and +# replace it with the LGPL-only in-tree ffmpeg built in wheel_builder +# (--disable-gpl --disable-nonfree; H.264 via NVENC, VP9 via libvpx). PyAV, +# torchaudio, torchvision, soundfile and Pillow all bundle their own libraries +# and do not link the system ffmpeg/codecs, so removing them is safe. dpkg-query +# keeps the purge robust across base-image/arch version suffixes (e.g. +# libavcodec58 vs 60), and autoremove then sweeps the now-orphaned media deps. +# +# CRITICAL: the base image marks the CUDA math libs (libcublas/libcusolver/ +# libcusparse) auto-installed, and the torch wheels here ship NO bundled cublas +# — torch loads the system copies. A bare autoremove would delete them and break +# GPU inference, so pin every CUDA/NVIDIA lib as manually-installed first. +RUN set -eux; \ + keep=$(dpkg-query -W -f='${Package}\n' 2>/dev/null \ + | grep -E '^(libcu|libnv|libnccl|cuda)' || true); \ + if [ -n "$keep" ]; then apt-mark manual $keep >/dev/null; fi; \ + purge=$(dpkg-query -W -f='${Package}\n' 2>/dev/null \ + | grep -E '^(ffmpeg|libav[a-z]|libsw[a-z]|libpostproc|libx264|libx265|libmp3lame|libaom|libdav1d|libvpx|libtheora|libvorbis|libopus|libsoxr)' \ + || true); \ + if [ -n "$purge" ]; then \ + DEBIAN_FRONTEND=noninteractive apt-get purge -y $purge; \ + fi; \ + DEBIAN_FRONTEND=noninteractive apt-get autoremove -y --purge; \ + rm -rf /var/lib/apt/lists/* + +# Copy the LGPL ffmpeg from wheel_builder: versioned shared libs (libav*.so*, +# libsw*.so*) plus the LGPL CLI binary that imageio/diffusers target via +# IMAGEIO_FFMPEG_EXE for video encoding. Ungated by enable_media_ffmpeg because +# the base GPL ffmpeg was just purged, so the LGPL CLI must always be present +# for the omni video-export path to have something to encode with. RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \ mkdir -p /usr/local/lib/pkgconfig && \ cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \ - cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \ + cp -nL /tmp/usr/local/lib/libav*.so* /tmp/usr/local/lib/libsw*.so* /usr/local/lib/ && \ + cp -nL /tmp/usr/local/lib/lib*vpx*.so* /usr/local/lib/ 2>/dev/null || true && \ cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \ - cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ -{% endif %} + cp -nL /tmp/usr/local/bin/ffmpeg /usr/local/bin/ffmpeg && \ + cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/ && \ + ldconfig +ENV IMAGEIO_FFMPEG_EXE=/usr/local/bin/ffmpeg -# Replace the upstream vllm/vllm-openai image's imageio-ffmpeg (which ships -# a GPL-encumbered prebuilt ffmpeg binary) with a source install that leaves -# no binary on disk. vLLM-Omni uses diffusers.export_to_video and doesn't -# invoke imageio-ffmpeg, so no IMAGEIO_FFMPEG_EXE is needed — this is -# purely to clear the GPL binary. The --no-binary directive lives in the -# requirements file itself. +# Replace the upstream vllm/vllm-openai image's imageio-ffmpeg (which ships a +# GPL-encumbered prebuilt ffmpeg binary in /imageio_ffmpeg/binaries/) +# with a source install that leaves no binary on disk. IMAGEIO_FFMPEG_EXE (set +# above) points imageio at the LGPL CLI copied from wheel_builder. The +# --no-binary directive lives in the requirements file itself. RUN --mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \ --mount=type=cache,target=/root/.cache/uv,sharing=locked \ export UV_CACHE_DIR=/root/.cache/uv && \