Skip to content

Commit aa16c49

Browse files
committed
Tests cleanup and move video code to video_utils
1 parent d2006e4 commit aa16c49

File tree

5 files changed

+67
-71
lines changed

5 files changed

+67
-71
lines changed

agents-core/vision_agents/core/utils/utils.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
import asyncio
22
import importlib.metadata
3-
import io
43
import logging
54
import os
65
import re
76
from dataclasses import dataclass
87
from typing import Dict, Optional
98

109
import httpx
11-
from PIL import Image
1210

1311
logger = logging.getLogger(__name__)
1412

@@ -132,31 +130,6 @@ def parse_instructions(text: str, base_dir: Optional[str] = None) -> Instruction
132130
)
133131

134132

135-
def frame_to_png_bytes(frame) -> bytes:
136-
"""
137-
Convert a video frame to PNG bytes.
138-
139-
Args:
140-
frame: Video frame object that can be converted to an image
141-
142-
Returns:
143-
PNG bytes of the frame, or empty bytes if conversion fails
144-
"""
145-
try:
146-
if hasattr(frame, "to_image"):
147-
img = frame.to_image()
148-
else:
149-
arr = frame.to_ndarray(format="rgb24")
150-
img = Image.fromarray(arr)
151-
152-
buf = io.BytesIO()
153-
img.save(buf, format="PNG")
154-
return buf.getvalue()
155-
except Exception as e:
156-
logger.error(f"Error converting frame to PNG: {e}")
157-
return b""
158-
159-
160133
def get_vision_agents_version() -> Optional[str]:
161134
"""
162135
Get the installed vision-agents package version.

agents-core/vision_agents/core/utils/video_utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import av
66
from PIL.Image import Resampling
7+
from PIL import Image
78

89

910
def ensure_even_dimensions(frame: av.VideoFrame) -> av.VideoFrame:
@@ -61,3 +62,24 @@ def frame_to_jpeg_bytes(
6162
buf = io.BytesIO()
6263
resized.save(buf, "JPEG", quality=quality, optimize=True)
6364
return buf.getvalue()
65+
66+
67+
def frame_to_png_bytes(frame: av.VideoFrame) -> bytes:
68+
"""
69+
Convert a video frame to PNG bytes.
70+
71+
Args:
72+
frame: Video frame object that can be converted to an image
73+
74+
Returns:
75+
PNG bytes of the frame, or empty bytes if conversion fails
76+
"""
77+
if hasattr(frame, "to_image"):
78+
img = frame.to_image()
79+
else:
80+
arr = frame.to_ndarray(format="rgb24")
81+
img = Image.fromarray(arr)
82+
83+
buf = io.BytesIO()
84+
img.save(buf, format="PNG")
85+
return buf.getvalue()

plugins/gemini/tests/test_gemini_realtime.py

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,26 @@
55
from vision_agents.core.tts.manual_test import play_pcm_with_ffplay
66
from vision_agents.plugins.gemini import Realtime
77
from vision_agents.core.llm.events import RealtimeAudioOutputEvent
8-
from vision_agents.core.utils.utils import frame_to_png_bytes
98
from getstream.video.rtc import PcmData, AudioFormat
109

1110
# Load environment variables
1211
load_dotenv()
1312

1413

15-
class TestGeminiRealtime:
16-
"""Integration tests for Realtime2 connect flow"""
14+
@pytest.fixture
15+
async def realtime(self):
16+
"""Create and manage Realtime connection lifecycle"""
17+
realtime = Realtime(
18+
model="gemini-2.0-flash-exp",
19+
)
20+
try:
21+
yield realtime
22+
finally:
23+
await realtime.close()
1724

18-
@pytest.fixture
19-
async def realtime(self):
20-
"""Create and manage Realtime connection lifecycle"""
21-
realtime = Realtime(
22-
model="gemini-2.0-flash-exp",
23-
)
24-
try:
25-
yield realtime
26-
finally:
27-
await realtime.close()
25+
26+
class TestGeminiRealtime:
27+
"""Integration tests for Gemini Realtime connect flow"""
2828

2929
@pytest.mark.integration
3030
async def test_simple_response_flow(self, realtime):
@@ -93,20 +93,3 @@ async def on_audio(event: RealtimeAudioOutputEvent):
9393
# Stop video sender
9494
await realtime._stop_watching_video_track()
9595
assert len(events) > 0
96-
97-
async def test_frame_to_png_bytes_with_bunny_video(self, bunny_video_track):
98-
"""Test that frame_to_png_bytes works with real bunny video frames"""
99-
# Get a frame from the bunny video track
100-
frame = await bunny_video_track.recv()
101-
png_bytes = frame_to_png_bytes(frame)
102-
103-
# Verify we got PNG data
104-
assert isinstance(png_bytes, bytes)
105-
assert len(png_bytes) > 0
106-
107-
# Verify it's actually PNG data (PNG files start with specific bytes)
108-
assert png_bytes.startswith(b"\x89PNG\r\n\x1a\n")
109-
110-
print(
111-
f"Successfully converted bunny video frame to PNG: {len(png_bytes)} bytes"
112-
)

plugins/gemini/vision_agents/plugins/gemini/gemini_realtime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from vision_agents.core.llm.llm import LLMResponseEvent
4040
from vision_agents.core.llm.llm_types import ToolSchema
4141
from vision_agents.core.processors import Processor
42-
from vision_agents.core.utils.utils import frame_to_png_bytes
42+
from vision_agents.core.utils.video_utils import frame_to_png_bytes
4343
from vision_agents.core.utils.video_forwarder import VideoForwarder
4444

4545
logger = logging.getLogger(__name__)

tests/test_utils.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
import os
33
import tempfile
44

5-
import numpy as np
65
import av
6+
import numpy as np
77
import pytest
8-
9-
from vision_agents.core.utils.utils import parse_instructions, Instructions
108
from getstream.video.rtc.track_util import AudioFormat
11-
from vision_agents.core.utils.video_utils import ensure_even_dimensions
129
from vision_agents.core.edge.types import PcmData
1310
from vision_agents.core.utils.logging import configure_default_logging
11+
from vision_agents.core.utils.utils import Instructions, parse_instructions
12+
from vision_agents.core.utils.video_utils import (
13+
ensure_even_dimensions,
14+
frame_to_png_bytes,
15+
)
1416

1517

1618
class TestParseInstructions:
@@ -316,7 +318,9 @@ def test_parse_instructions_default_base_dir(self):
316318

317319
# This test verifies that when base_dir is provided explicitly, it uses that directory
318320
text = "Read @readme.md for information."
319-
result = parse_instructions(text, base_dir=temp_dir) # Use temp_dir as base_dir
321+
result = parse_instructions(
322+
text, base_dir=temp_dir
323+
) # Use temp_dir as base_dir
320324

321325
assert result.input_text == text
322326
# Content will not be empty since readme.md exists in the temp directory
@@ -508,31 +512,31 @@ def test_pcm_data_resample_av_array_shape_fix(self):
508512

509513
class TestEnsureEvenDimensions:
510514
"""Test suite for ensure_even_dimensions function."""
511-
515+
512516
def test_even_dimensions_unchanged(self):
513517
"""Test that frames with even dimensions pass through unchanged."""
514518
# Create a frame with even dimensions (1920x1080)
515519
frame = av.VideoFrame(width=1920, height=1080, format="yuv420p")
516-
520+
517521
result = ensure_even_dimensions(frame)
518-
522+
519523
assert result.width == 1920
520524
assert result.height == 1080
521-
525+
522526
def test_both_dimensions_odd_cropped(self):
523527
"""Test that frames with both odd dimensions are cropped."""
524528
# Create a frame with both odd dimensions (1921x1081)
525529
frame = av.VideoFrame(width=1921, height=1081, format="yuv420p")
526-
530+
527531
result = ensure_even_dimensions(frame)
528-
532+
529533
assert result.width == 1920 # Cropped from 1921
530534
assert result.height == 1080 # Cropped from 1081
531-
535+
532536
def test_timing_information_preserved(self):
533537
"""Test that pts and time_base are preserved after cropping."""
534538
from fractions import Fraction
535-
539+
536540
# Create a frame with timing information
537541
frame = av.VideoFrame(width=1921, height=1081, format="yuv420p")
538542
frame.pts = 12345
@@ -603,3 +607,17 @@ def test_configure_default_logging_level_already_set(self, make_logger):
603607
for logger in loggers:
604608
assert logger.level == logging.DEBUG
605609
assert logger.handlers
610+
611+
612+
async def test_frame_to_png_bytes_with_bunny_video(bunny_video_track):
613+
"""Test that frame_to_png_bytes works with real bunny video frames"""
614+
# Get a frame from the bunny video track
615+
frame = await bunny_video_track.recv()
616+
png_bytes = frame_to_png_bytes(frame)
617+
618+
# Verify we got PNG data
619+
assert isinstance(png_bytes, bytes)
620+
assert len(png_bytes) > 0
621+
622+
# Verify it's actually PNG data (PNG files start with specific bytes)
623+
assert png_bytes.startswith(b"\x89PNG\r\n\x1a\n")

0 commit comments

Comments
 (0)