Skip to content

Commit e12112d

Browse files
committed
wip
1 parent d2853cd commit e12112d

File tree

8 files changed

+815
-12
lines changed

8 files changed

+815
-12
lines changed

agents-core/vision_agents/core/llm/llm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@
2323

2424

2525
class LLMResponseEvent(Generic[T]):
26-
def __init__(self, original: T, text: str):
26+
def __init__(self, original: T, text: str, exception: Optional[Exception] = None):
2727
self.original = original
2828
self.text = text
29+
self.exception = exception
2930

3031

3132
BeforeCb = Callable[[List[Any]], None]

conftest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ def mia_audio_16khz():
7777
return pcm
7878

7979

80+
@pytest.fixture
81+
def golf_swing_image():
82+
"""Load golf_swing.png image and return as bytes."""
83+
image_file_path = os.path.join(get_assets_dir(), "golf_swing.png")
84+
85+
with open(image_file_path, "rb") as f:
86+
image_bytes = f.read()
87+
88+
return image_bytes
89+
90+
8091
@pytest.fixture
8192
async def bunny_video_track():
8293
"""Create RealVideoTrack from video file."""

plugins/bedrock/README.md

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# AWS Bedrock Plugin for Vision Agents
22

3-
AWS Bedrock LLM integration for Vision Agents framework.
3+
AWS Bedrock LLM integration for Vision Agents framework with support for both standard and realtime interactions.
44

55
## Installation
66

@@ -10,6 +10,8 @@ pip install vision-agents-plugins-bedrock
1010

1111
## Usage
1212

13+
### Standard LLM Usage
14+
1315
```python
1416
from vision_agents.plugins import bedrock
1517

@@ -24,6 +26,35 @@ response = await llm.simple_response("Hello, how are you?")
2426
print(response.text)
2527
```
2628

29+
### Realtime Audio/Video Usage
30+
31+
```python
32+
from vision_agents.plugins import bedrock
33+
34+
# Initialize Bedrock Realtime with Nova Sonic for speech-to-speech
35+
realtime = bedrock.Realtime(
36+
model="us.amazon.nova-sonic-v1:0",
37+
region_name="us-east-1",
38+
sample_rate=16000
39+
)
40+
41+
# Connect to the session
42+
await realtime.connect()
43+
44+
# Send text message
45+
await realtime.simple_response("Describe what you see")
46+
47+
# Send audio
48+
pcm_data = PcmData(...) # Your audio data
49+
await realtime.simple_audio_response(pcm_data)
50+
51+
# Watch video track
52+
await realtime._watch_video_track(video_track)
53+
54+
# Close when done
55+
await realtime.close()
56+
```
57+
2758
## Configuration
2859

2960
The plugin uses boto3 for AWS authentication. You can configure credentials using:
@@ -33,11 +64,22 @@ The plugin uses boto3 for AWS authentication. You can configure credentials usin
3364

3465
## Supported Models
3566

67+
### Standard Models (LLM class)
3668
All AWS Bedrock models are supported, including:
3769
- Claude 3.5 models (anthropic.claude-*)
3870
- Amazon Titan models (amazon.titan-*)
3971
- Meta Llama models (meta.llama-*)
4072
- And more
4173

74+
### Realtime Models (Realtime class)
75+
Realtime audio/video models optimized for speech-to-speech:
76+
- **Amazon Nova Sonic (us.amazon.nova-sonic-v1:0)** - Primary model for realtime interactions with ultra-low latency
77+
- Amazon Nova Lite (us.amazon.nova-lite-v1:0)
78+
- Amazon Nova Micro (us.amazon.nova-micro-v1:0)
79+
- Amazon Nova Pro (us.amazon.nova-pro-v1:0)
80+
- And other Nova models
81+
82+
**Note:** Nova Sonic is specifically designed for realtime speech-to-speech conversations and is the recommended default for the Realtime class.
83+
4284
See [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/) for available models.
4385

plugins/bedrock/tests/test_bedrock.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,52 @@
11
"""Tests for AWS Bedrock plugin."""
2+
import os
3+
24
import pytest
35
from dotenv import load_dotenv
46

5-
from plugins.bedrock.vision_agents.plugins.bedrock.bedrock_llm import BedrockLLM
67
from vision_agents.core.agents.conversation import InMemoryConversation
78
from vision_agents.core.agents.conversation import Message
89
from vision_agents.core.llm.events import LLMResponseChunkEvent
10+
from vision_agents.core.utils.utils import Instructions
11+
from vision_agents.plugins.bedrock.bedrock_llm import BedrockLLM
912

1013
load_dotenv()
1114

15+
"""
16+
TODO:
17+
- Cleanup how we do llm.parsed_instructions
18+
- Remove duplication between streaming and non streaming
19+
"""
20+
1221

1322
class TestBedrockLLM:
1423
"""Test suite for BedrockLLM class with real API calls."""
1524

25+
def assert_response_successful(self, response):
26+
"""
27+
Utility method to verify a response is successful.
28+
29+
A successful response has:
30+
- response.text is set (not None and not empty)
31+
- response.exception is None
32+
33+
Args:
34+
response: LLMResponseEvent to check
35+
"""
36+
assert response.text is not None, "Response text should not be None"
37+
assert len(response.text) > 0, "Response text should not be empty"
38+
assert not hasattr(response, 'exception') or response.exception is None, f"Response should not have an exception, got: {getattr(response, 'exception', None)}"
39+
1640
@pytest.fixture
1741
async def llm(self) -> BedrockLLM:
1842
"""Test BedrockLLM initialization with a provided client."""
1943
llm = BedrockLLM(
20-
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
44+
model="qwen.qwen3-32b-v1:0",
2145
region_name="us-east-1"
2246
)
47+
if not os.environ.get("AWS_BEARER_TOKEN_BEDROCK"):
48+
raise Exception("Please set AWS_BEARER_TOKEN_BEDROCK")
49+
2350
llm._conversation = InMemoryConversation("be friendly", [])
2451
return llm
2552

@@ -45,16 +72,15 @@ async def test_simple(self, llm: BedrockLLM):
4572
response = await llm.simple_response(
4673
"Explain quantum computing in 1 paragraph",
4774
)
48-
assert response.text
75+
self.assert_response_successful(response)
4976

5077
@pytest.mark.integration
5178
async def test_native_api(self, llm: BedrockLLM):
5279
response = await llm.converse(
5380
messages=[{"role": "user", "content": [{"text": "say hi"}]}],
5481
)
5582

56-
# Assertions
57-
assert response.text
83+
self.assert_response_successful(response)
5884

5985
@pytest.mark.integration
6086
async def test_stream(self, llm: BedrockLLM):
@@ -96,3 +122,54 @@ async def test_native_memory(self, llm: BedrockLLM):
96122
)
97123
assert "8" in response.text or "eight" in response.text
98124

125+
@pytest.mark.integration
126+
async def test_image_description(self, golf_swing_image):
127+
# Use a vision-capable model (Claude 3 Haiku supports images and is widely available)
128+
vision_llm = BedrockLLM(
129+
model="anthropic.claude-3-haiku-20240307-v1:0",
130+
region_name="us-east-1"
131+
)
132+
133+
image_bytes = golf_swing_image
134+
response = await vision_llm.converse(
135+
messages=[{
136+
"role": "user",
137+
"content": [
138+
{
139+
"image": {
140+
"format": "png",
141+
"source": {
142+
"bytes": image_bytes
143+
}
144+
}
145+
},
146+
{
147+
"text": "What sport do you see in this image?"
148+
}
149+
]
150+
}]
151+
)
152+
153+
self.assert_response_successful(response)
154+
assert "golf" in response.text.lower()
155+
156+
@pytest.mark.integration
157+
async def test_instruction_following(self, llm: BedrockLLM):
158+
llm = BedrockLLM(
159+
model="qwen.qwen3-32b-v1:0",
160+
region_name="us-east-1",
161+
)
162+
llm.parsed_instructions = Instructions(
163+
input_text="only reply in 2 letter country shortcuts",
164+
markdown_contents={}
165+
)
166+
167+
response = await llm.simple_response(
168+
text="Which country is rainy, protected from water with dikes and below sea level?",
169+
)
170+
171+
self.assert_response_successful(response)
172+
assert "nl" in response.text.lower()
173+
174+
175+
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""Tests for AWS Bedrock Realtime plugin."""
2+
import asyncio
3+
import pytest
4+
from dotenv import load_dotenv
5+
6+
from vision_agents.plugins.bedrock import Realtime
7+
from vision_agents.core.llm.events import RealtimeAudioOutputEvent
8+
9+
# Load environment variables
10+
load_dotenv()
11+
12+
13+
class TestBedrockRealtime:
14+
"""Integration tests for Bedrock Realtime connect flow"""
15+
16+
@pytest.fixture
17+
async def realtime(self):
18+
"""Create and manage Realtime connection lifecycle"""
19+
realtime = Realtime(
20+
model="us.amazon.nova-sonic-v1:0",
21+
region_name="us-east-1",
22+
)
23+
try:
24+
yield realtime
25+
finally:
26+
await realtime.close()
27+
28+
@pytest.mark.integration
29+
async def test_simple_response_flow(self, realtime):
30+
"""Test sending a simple text message and receiving response"""
31+
# Send a simple message
32+
events = []
33+
34+
@realtime.events.subscribe
35+
async def on_audio(event: RealtimeAudioOutputEvent):
36+
events.append(event)
37+
38+
await asyncio.sleep(0.01)
39+
await realtime.connect()
40+
await realtime.simple_response("Hello, can you hear me? Please respond with a short greeting.")
41+
42+
# Wait for response
43+
await asyncio.sleep(5.0)
44+
45+
# Note: Depending on model capabilities, audio events may or may not be generated
46+
# The test passes if no exceptions are raised
47+
assert True
48+
49+
@pytest.mark.integration
50+
async def test_audio_sending_flow(self, realtime, mia_audio_16khz):
51+
"""Test sending real audio data and verify connection remains stable"""
52+
events = []
53+
54+
@realtime.events.subscribe
55+
async def on_audio(event: RealtimeAudioOutputEvent):
56+
events.append(event)
57+
58+
await asyncio.sleep(0.01)
59+
await realtime.connect()
60+
61+
await realtime.simple_response("Listen to the following story, what is Mia looking for?")
62+
await asyncio.sleep(10.0)
63+
await realtime.simple_audio_response(mia_audio_16khz)
64+
65+
# Wait a moment to ensure processing
66+
await asyncio.sleep(10.0)
67+
68+
# Test passes if no exceptions are raised
69+
assert True
70+
71+
@pytest.mark.integration
72+
async def test_video_sending_flow(self, realtime, bunny_video_track):
73+
"""Test sending real video data and verify connection remains stable"""
74+
events = []
75+
76+
@realtime.events.subscribe
77+
async def on_audio(event: RealtimeAudioOutputEvent):
78+
events.append(event)
79+
80+
await asyncio.sleep(0.01)
81+
await realtime.connect()
82+
await realtime.simple_response("Describe what you see in this video please")
83+
await asyncio.sleep(5.0)
84+
85+
# Start video sender with low FPS to avoid overwhelming the connection
86+
await realtime._watch_video_track(bunny_video_track)
87+
88+
# Let it run for a few seconds
89+
await asyncio.sleep(10.0)
90+
91+
# Stop video sender
92+
await realtime._stop_watching_video_track()
93+
94+
# Test passes if no exceptions are raised
95+
assert True
96+
97+
@pytest.mark.integration
98+
async def test_connection_lifecycle(self, realtime):
99+
"""Test that connection can be established and closed properly"""
100+
# Connect
101+
await realtime.connect()
102+
assert realtime._connected is True
103+
104+
# Send a simple message
105+
await realtime.simple_response("Test message")
106+
await asyncio.sleep(2.0)
107+
108+
# Close
109+
await realtime.close()
110+
assert realtime._connected is False
111+
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .bedrock_llm import BedrockLLM as LLM
2+
from .bedrock_realtime import Realtime
23

3-
__all__ = ["LLM"]
4+
__all__ = ["LLM", "Realtime"]
45

0 commit comments

Comments
 (0)