Tests cleanup and move video code to video_utils

dangusev · dangusev · commit aa16c4980a40 · 2025-11-12T23:38:33.000+01:00
diff --git a/agents-core/vision_agents/core/utils/utils.py b/agents-core/vision_agents/core/utils/utils.py
@@ -1,14 +1,12 @@
 import asyncio
 import importlib.metadata
-import io
 import logging
 import os
 import re
 from dataclasses import dataclass
 from typing import Dict, Optional
 
 import httpx
-from PIL import Image
 
 logger = logging.getLogger(__name__)
 
@@ -132,31 +130,6 @@ def parse_instructions(text: str, base_dir: Optional[str] = None) -> Instruction
     )
 
 
-def frame_to_png_bytes(frame) -> bytes:
-    """
-    Convert a video frame to PNG bytes.
-
-    Args:
-        frame: Video frame object that can be converted to an image
-
-    Returns:
-        PNG bytes of the frame, or empty bytes if conversion fails
-    """
-    try:
-        if hasattr(frame, "to_image"):
-            img = frame.to_image()
-        else:
-            arr = frame.to_ndarray(format="rgb24")
-            img = Image.fromarray(arr)
-
-        buf = io.BytesIO()
-        img.save(buf, format="PNG")
-        return buf.getvalue()
-    except Exception as e:
-        logger.error(f"Error converting frame to PNG: {e}")
-        return b""
-
-
 def get_vision_agents_version() -> Optional[str]:
     """
     Get the installed vision-agents package version.
diff --git a/agents-core/vision_agents/core/utils/video_utils.py b/agents-core/vision_agents/core/utils/video_utils.py
@@ -4,6 +4,7 @@
 
 import av
 from PIL.Image import Resampling
+from PIL import Image
 
 
 def ensure_even_dimensions(frame: av.VideoFrame) -> av.VideoFrame:
@@ -61,3 +62,24 @@ def frame_to_jpeg_bytes(
     buf = io.BytesIO()
     resized.save(buf, "JPEG", quality=quality, optimize=True)
     return buf.getvalue()
+
+
+def frame_to_png_bytes(frame: av.VideoFrame) -> bytes:
+    """
+    Convert a video frame to PNG bytes.
+
+    Args:
+        frame: Video frame object that can be converted to an image
+
+    Returns:
+        PNG bytes of the frame, or empty bytes if conversion fails
+    """
+    if hasattr(frame, "to_image"):
+        img = frame.to_image()
+    else:
+        arr = frame.to_ndarray(format="rgb24")
+        img = Image.fromarray(arr)
+
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
diff --git a/plugins/gemini/tests/test_gemini_realtime.py b/plugins/gemini/tests/test_gemini_realtime.py
@@ -5,26 +5,26 @@
 from vision_agents.core.tts.manual_test import play_pcm_with_ffplay
 from vision_agents.plugins.gemini import Realtime
 from vision_agents.core.llm.events import RealtimeAudioOutputEvent
-from vision_agents.core.utils.utils import frame_to_png_bytes
 from getstream.video.rtc import PcmData, AudioFormat
 
 # Load environment variables
 load_dotenv()
 
 
-class TestGeminiRealtime:
-    """Integration tests for Realtime2 connect flow"""
+@pytest.fixture
+async def realtime(self):
+    """Create and manage Realtime connection lifecycle"""
+    realtime = Realtime(
+        model="gemini-2.0-flash-exp",
+    )
+    try:
+        yield realtime
+    finally:
+        await realtime.close()
 
-    @pytest.fixture
-    async def realtime(self):
-        """Create and manage Realtime connection lifecycle"""
-        realtime = Realtime(
-            model="gemini-2.0-flash-exp",
-        )
-        try:
-            yield realtime
-        finally:
-            await realtime.close()
+
+class TestGeminiRealtime:
+    """Integration tests for Gemini Realtime connect flow"""
 
     @pytest.mark.integration
     async def test_simple_response_flow(self, realtime):
@@ -93,20 +93,3 @@ async def on_audio(event: RealtimeAudioOutputEvent):
         # Stop video sender
         await realtime._stop_watching_video_track()
         assert len(events) > 0
-
-    async def test_frame_to_png_bytes_with_bunny_video(self, bunny_video_track):
-        """Test that frame_to_png_bytes works with real bunny video frames"""
-        # Get a frame from the bunny video track
-        frame = await bunny_video_track.recv()
-        png_bytes = frame_to_png_bytes(frame)
-
-        # Verify we got PNG data
-        assert isinstance(png_bytes, bytes)
-        assert len(png_bytes) > 0
-
-        # Verify it's actually PNG data (PNG files start with specific bytes)
-        assert png_bytes.startswith(b"\x89PNG\r\n\x1a\n")
-
-        print(
-            f"Successfully converted bunny video frame to PNG: {len(png_bytes)} bytes"
-        )
diff --git a/plugins/gemini/vision_agents/plugins/gemini/gemini_realtime.py b/plugins/gemini/vision_agents/plugins/gemini/gemini_realtime.py
@@ -39,7 +39,7 @@
 from vision_agents.core.llm.llm import LLMResponseEvent
 from vision_agents.core.llm.llm_types import ToolSchema
 from vision_agents.core.processors import Processor
-from vision_agents.core.utils.utils import frame_to_png_bytes
+from vision_agents.core.utils.video_utils import frame_to_png_bytes
 from vision_agents.core.utils.video_forwarder import VideoForwarder
 
 logger = logging.getLogger(__name__)
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -2,15 +2,17 @@
 import os
 import tempfile
 
-import numpy as np
 import av
+import numpy as np
 import pytest
-
-from vision_agents.core.utils.utils import parse_instructions, Instructions
 from getstream.video.rtc.track_util import AudioFormat
-from vision_agents.core.utils.video_utils import ensure_even_dimensions
 from vision_agents.core.edge.types import PcmData
 from vision_agents.core.utils.logging import configure_default_logging
+from vision_agents.core.utils.utils import Instructions, parse_instructions
+from vision_agents.core.utils.video_utils import (
+    ensure_even_dimensions,
+    frame_to_png_bytes,
+)
 
 
 class TestParseInstructions:
@@ -316,7 +318,9 @@ def test_parse_instructions_default_base_dir(self):
 
             # This test verifies that when base_dir is provided explicitly, it uses that directory
             text = "Read @readme.md for information."
-            result = parse_instructions(text, base_dir=temp_dir)  # Use temp_dir as base_dir
+            result = parse_instructions(
+                text, base_dir=temp_dir
+            )  # Use temp_dir as base_dir
 
             assert result.input_text == text
             # Content will not be empty since readme.md exists in the temp directory
@@ -508,31 +512,31 @@ def test_pcm_data_resample_av_array_shape_fix(self):
 
 class TestEnsureEvenDimensions:
     """Test suite for ensure_even_dimensions function."""
-    
+
     def test_even_dimensions_unchanged(self):
         """Test that frames with even dimensions pass through unchanged."""
         # Create a frame with even dimensions (1920x1080)
         frame = av.VideoFrame(width=1920, height=1080, format="yuv420p")
-        
+
         result = ensure_even_dimensions(frame)
-        
+
         assert result.width == 1920
         assert result.height == 1080
-    
+
     def test_both_dimensions_odd_cropped(self):
         """Test that frames with both odd dimensions are cropped."""
         # Create a frame with both odd dimensions (1921x1081)
         frame = av.VideoFrame(width=1921, height=1081, format="yuv420p")
-        
+
         result = ensure_even_dimensions(frame)
-        
+
         assert result.width == 1920  # Cropped from 1921
         assert result.height == 1080  # Cropped from 1081
-    
+
     def test_timing_information_preserved(self):
         """Test that pts and time_base are preserved after cropping."""
         from fractions import Fraction
-        
+
         # Create a frame with timing information
         frame = av.VideoFrame(width=1921, height=1081, format="yuv420p")
         frame.pts = 12345
@@ -603,3 +607,17 @@ def test_configure_default_logging_level_already_set(self, make_logger):
         for logger in loggers:
             assert logger.level == logging.DEBUG
             assert logger.handlers
+
+
+async def test_frame_to_png_bytes_with_bunny_video(bunny_video_track):
+    """Test that frame_to_png_bytes works with real bunny video frames"""
+    # Get a frame from the bunny video track
+    frame = await bunny_video_track.recv()
+    png_bytes = frame_to_png_bytes(frame)
+
+    # Verify we got PNG data
+    assert isinstance(png_bytes, bytes)
+    assert len(png_bytes) > 0
+
+    # Verify it's actually PNG data (PNG files start with specific bytes)
+    assert png_bytes.startswith(b"\x89PNG\r\n\x1a\n")