From 871574456d10e9cbb3a32f0c35d243e3f5c4cf33 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 7 May 2025 20:11:59 +0800 Subject: [PATCH 1/5] fix default video io and add tests Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/multimodal/test_utils.py | 16 ++++++++++++++++ vllm/multimodal/video.py | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index ce1429fda943..8ba5b9630ce5 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -26,6 +26,11 @@ "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png", ] +TEST_VIDEO_URLS = [ + "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4", + "https://opencv.org/wp-content/uploads/2025/02/Example-Video.mp4", +] + @pytest.fixture(scope="module") def url_images() -> dict[str, Image.Image]: @@ -134,6 +139,17 @@ async def test_fetch_image_local_files(image_url: str): f"file://{temp_dir}/../{os.path.basename(image_url)}") +@pytest.mark.asyncio +@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS) +@pytest.mark.parametrize("num_frames", [-1, 32, 1800]) +async def test_fetch_video_http(video_url: str, num_frames: int): + connector = MediaConnector() + + video_sync = connector.fetch_video(video_url, num_frames) + video_async = await connector.fetch_video_async(video_url, num_frames) + assert np.array_equal(video_sync, video_async) + + # Used for the next two tests related to `merge_and_sort_multimodal_metadata`. class TestCase(NamedTuple): mm_positions: "MultiModalPlaceholderDict" diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index 6d875a1c651e..f4b2ce65d13a 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -81,7 +81,8 @@ def load_bytes(cls, data: bytes, num_frames: int = -1) -> npt.NDArray: total_frames_num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) full_read = num_frames == -1 or total_frames_num < num_frames if full_read: - frame_idx = list(range(0, total_frames_num)) + num_frames = total_frames_num + frame_idx = list(range(0, num_frames)) else: uniform_sampled_frames = np.linspace(0, total_frames_num - 1, From 62c6a65c9871516e6f453f7c1ef824f82c624b40 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 7 May 2025 20:13:58 +0800 Subject: [PATCH 2/5] use small video for test Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/multimodal/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index 8ba5b9630ce5..8c2dc77f4338 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -28,7 +28,6 @@ TEST_VIDEO_URLS = [ "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4", - "https://opencv.org/wp-content/uploads/2025/02/Example-Video.mp4", ] From 6c8f72971df800070d78248089632e9813169bec Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 7 May 2025 20:27:36 +0800 Subject: [PATCH 3/5] add avi test case Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/multimodal/test_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index 8c2dc77f4338..d8870055001c 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -28,6 +28,7 @@ TEST_VIDEO_URLS = [ "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4", + "https://filesamples.com/samples/video/avi/sample_640x360.avi", ] @@ -144,8 +145,8 @@ async def test_fetch_image_local_files(image_url: str): async def test_fetch_video_http(video_url: str, num_frames: int): connector = MediaConnector() - video_sync = connector.fetch_video(video_url, num_frames) - video_async = await connector.fetch_video_async(video_url, num_frames) + video_sync = connector.fetch_video(video_url, num_frames=num_frames) + video_async = await connector.fetch_video_async(video_url, num_frames=num_frames) assert np.array_equal(video_sync, video_async) From 259c1650d6b7636ef682c80f45944c0c8b5ffd6a Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 7 May 2025 20:33:19 +0800 Subject: [PATCH 4/5] better error message Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/multimodal/video.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index f4b2ce65d13a..72e9b65d763c 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -105,7 +105,8 @@ def load_bytes(cls, data: bytes, num_frames: int = -1) -> npt.NDArray: frames[i] = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) i += 1 # we expect all frames loaded - assert i == num_frames + assert i == num_frames, (f"Expected reading {num_frames} frames, " + f"but only loaded {i} frames from video.") return frames From 0edd82bc96f40e47c62ace6cc240651704908d2f Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 7 May 2025 20:49:33 +0800 Subject: [PATCH 5/5] code format Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/multimodal/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index d8870055001c..478184c34b91 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -146,7 +146,8 @@ async def test_fetch_video_http(video_url: str, num_frames: int): connector = MediaConnector() video_sync = connector.fetch_video(video_url, num_frames=num_frames) - video_async = await connector.fetch_video_async(video_url, num_frames=num_frames) + video_async = await connector.fetch_video_async(video_url, + num_frames=num_frames) assert np.array_equal(video_sync, video_async)