Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 61 additions & 7 deletions plugins/aws/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# AWS Plugin for Vision Agents

AWS (Bedrock) LLM integration for Vision Agents framework with support for both standard and realtime interactions. Includes AWS Polly TTS.
AWS (Bedrock) LLM integration for Vision Agents framework with support for both standard and realtime interactions.

## Installation

Expand All @@ -20,17 +20,19 @@ agent = Agent(
agent_user=User(name="Friendly AI"),
instructions="Be nice to the user",
llm=aws.LLM(model="qwen.qwen3-32b-v1:0"),
tts=aws.TTS(), # using AWS Polly
tts=cartesia.TTS(),
stt=deepgram.STT(),
turn_detection=smart_turn.TurnDetection(buffer_duration=2.0, confidence_threshold=0.5),
)
```

The full example is available in example/aws_qwen_example.py

Nova sonic audio realtime STS is also supported:
### Realtime Audio Usage

```python
Nova Sonic audio realtime STS is also supported:

```python
agent = Agent(
edge=getstream.Edge(),
agent_user=User(name="Story Teller AI"),
Expand All @@ -39,8 +41,61 @@ agent = Agent(
)
```

### Polly TTS Usage
## Function Calling

### Standard LLM (aws.LLM)

The standard LLM implementation **fully supports** function calling. Register functions using the `@llm.register_function` decorator:

```python
from vision_agents.plugins import aws

llm = aws.LLM(
model="qwen.qwen3-32b-v1:0",
region_name="us-east-1"
)

@llm.register_function(
name="get_weather",
description="Get the current weather for a given city"
)
def get_weather(city: str) -> dict:
"""Get weather information for a city."""
return {
"city": city,
"temperature": 72,
"condition": "Sunny"
}
```

### Realtime (aws.Realtime)

The Realtime implementation **fully supports** function calling with AWS Nova Sonic. Register functions using the `@llm.register_function` decorator:

```python
from vision_agents.plugins import aws

llm = aws.Realtime(
model="amazon.nova-sonic-v1:0",
region_name="us-east-1"
)

@llm.register_function(
name="get_weather",
description="Get the current weather for a given city"
)
def get_weather(city: str) -> dict:
"""Get weather information for a city."""
return {
"city": city,
"temperature": 72,
"condition": "Sunny"
}

# The function will be automatically called when the model decides to use it
```

See `example/aws_realtime_function_calling_example.py` for a complete example.

## Running the examples

Expand All @@ -53,9 +108,8 @@ STREAM_API_SECRET=your_stream_api_secret_here
AWS_BEARER_TOKEN_BEDROCK=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=us-east-1

FAL_KEY=
CARTESIA_API_KEY=
DEEPGRAM_API_KEY=
```
```
89 changes: 89 additions & 0 deletions plugins/aws/example/aws_llm_function_calling_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import asyncio
import logging
from uuid import uuid4

from dotenv import load_dotenv

from vision_agents.core import User
from vision_agents.core.agents import Agent
from vision_agents.plugins import aws, getstream, cartesia, deepgram

load_dotenv()

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)


async def start_agent() -> None:
agent = Agent(
edge=getstream.Edge(),
agent_user=User(name="Weather Bot"),
instructions="You are a helpful weather bot. Use the provided tools to answer questions.",
llm=aws.LLM(
model="anthropic.claude-3-sonnet-20240229-v1:0",
region_name="us-east-1"

),
tts=cartesia.TTS(),
stt=deepgram.STT(),
# turn_detection=smart_turn.TurnDetection(buffer_duration=2.0, confidence_threshold=0.5),
)

# Register custom functions
@agent.llm.register_function(
name="get_weather",
description="Get the current weather for a given city"
)
def get_weather(city: str) -> dict:
"""Get weather information for a city."""
logger.info(f"Tool: get_weather called for city: {city}")
if city.lower() == "boulder":
return {"city": city, "temperature": 72, "condition": "Sunny"}
return {"city": city, "temperature": "unknown", "condition": "unknown"}

@agent.llm.register_function(
name="calculate",
description="Performs a mathematical calculation"
)
def calculate(expression: str) -> dict:
"""Performs a mathematical calculation."""
logger.info(f"Tool: calculate called with expression: {expression}")
try:
result = eval(expression) # DANGER: In a real app, use a safer math evaluator!
return {"expression": expression, "result": result}
except Exception as e:
return {"expression": expression, "error": str(e)}

await agent.create_user()

call = agent.edge.client.video.call("default", str(uuid4()))
await agent.edge.open_demo(call)

with await agent.join(call):
# Give the agent a moment to connect
await asyncio.sleep(5)

# Test function calling with weather
logger.info("Testing weather function...")
await agent.llm.simple_response(
text="What's the weather like in Boulder? Please use the get_weather function."
)

await asyncio.sleep(5)

# Test function calling with calculation
logger.info("Testing calculation function...")
await agent.llm.simple_response(
text="Can you calculate 25 multiplied by 4 using the calculate function?"
)

await asyncio.sleep(5)

# Wait a bit before finishing
await asyncio.sleep(5)
await agent.finish()


if __name__ == "__main__":
asyncio.run(start_agent())

128 changes: 128 additions & 0 deletions plugins/aws/example/aws_realtime_function_calling_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import asyncio
import logging
from uuid import uuid4

from dotenv import load_dotenv

from vision_agents.core import User
from vision_agents.core.agents import Agent
from vision_agents.plugins import aws, getstream

load_dotenv()

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s"
)
logger = logging.getLogger(__name__)


async def start_agent() -> None:
"""Example demonstrating AWS Bedrock realtime with function calling.

This example creates an agent that can call custom functions to get
weather information and perform calculations.
"""

# Create the agent with AWS Bedrock Realtime
agent = Agent(
edge=getstream.Edge(),
agent_user=User(name="Weather Assistant AI"),
instructions="""You are a helpful weather assistant. When users ask about weather,
use the get_weather function to fetch current conditions. You can also help with
simple calculations using the calculate function.""",
llm=aws.Realtime(
model="amazon.nova-sonic-v1:0",
region_name="us-east-1",
),
)

# Register custom functions that the LLM can call
@agent.llm.register_function(
name="get_weather",
description="Get the current weather for a given city"
)
def get_weather(city: str) -> dict:
"""Get weather information for a city.

Args:
city: The name of the city

Returns:
Weather information including temperature and conditions
"""
# This is a mock implementation - in production you'd call a real weather API
weather_data = {
"Boulder": {"temp": 72, "condition": "Sunny", "humidity": 30},
"Seattle": {"temp": 58, "condition": "Rainy", "humidity": 85},
"Miami": {"temp": 85, "condition": "Partly Cloudy", "humidity": 70},
}

city_weather = weather_data.get(city, {"temp": 70, "condition": "Unknown", "humidity": 50})
return {
"city": city,
"temperature": city_weather["temp"],
"condition": city_weather["condition"],
"humidity": city_weather["humidity"],
"unit": "Fahrenheit"
}

@agent.llm.register_function(
name="calculate",
description="Perform a mathematical calculation"
)
def calculate(operation: str, a: float, b: float) -> dict:
"""Perform a calculation.

Args:
operation: The operation to perform (add, subtract, multiply, divide)
a: First number
b: Second number

Returns:
Result of the calculation
"""
operations = {
"add": lambda x, y: x + y,
"subtract": lambda x, y: x - y,
"multiply": lambda x, y: x * y,
"divide": lambda x, y: x / y if y != 0 else None,
}

if operation not in operations:
return {"error": f"Unknown operation: {operation}"}

result = operations[operation](a, b)
if result is None:
return {"error": "Cannot divide by zero"}

return {
"operation": operation,
"a": a,
"b": b,
"result": result
}

# Create and start the agent
await agent.create_user()

call = agent.edge.client.video.call("default", str(uuid4()))
await agent.edge.open_demo(call)

with await agent.join(call):
# Give the agent a moment to connect
await asyncio.sleep(5)

await agent.llm.simple_response(
text="What's the weather like in Boulder? Please use the get_weather function."
)

# Wait for AWS Nova to process the request and call the function
await asyncio.sleep(15)

await agent.finish()


if __name__ == "__main__":
asyncio.run(start_agent())

12 changes: 6 additions & 6 deletions plugins/aws/example/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
[project]
name = "gemini-live-realtime-example"
name = "aws-bedrock-realtime-example"
version = "0.0.0"
requires-python = ">=3.10"
requires-python = ">=3.12"

# put only what this example needs
dependencies = [
"python-dotenv>=1.0",
"vision-agents-plugins-gemini",
"vision-agents-plugins-aws",
"vision-agents-plugins-getstream",
"vision-agents",
"google-genai>=1.33.0",
"boto3>=1.26.0",
"opentelemetry-exporter-otlp>=1.37.0",
"opentelemetry-exporter-prometheus>=0.58b0",
"prometheus-client>=0.23.1",
"opentelemetry-sdk>=1.37.0",
]

[tool.uv.sources]
"vision-agents-plugins-getstream" = {path = "../../../plugins/getstream", editable=true}
"vision-agents-plugins-gemini" = {path = "../../../plugins/gemini", editable=true}
"vision-agents-plugins-getstream" = {path = "../../getstream", editable=true}
"vision-agents-plugins-aws" = {path = "..", editable=true}
"vision-agents" = {path = "../../../agents-core", editable=true}
Loading
Loading