55
66This example creates an agent that uses:
77- Inworld AI for text-to-speech (TTS)
8- - GetStream for edge/real-time communication
8+ - Stream for edge/real-time communication
9+ - Deepgram for speech-to-text (STT)
910- Smart Turn for turn detection
1011
1112Requirements:
1213- INWORLD_API_KEY environment variable
1314- STREAM_API_KEY and STREAM_API_SECRET environment variables
15+ - DEEPGRAM_API_KEY environment variable
1416"""
1517
1618import asyncio
2022
2123from vision_agents .core import User , Agent , cli
2224from vision_agents .core .agents import AgentLauncher
23- from vision_agents .plugins import inworld , getstream , smart_turn , gemini
25+ from vision_agents .plugins import inworld , getstream , smart_turn , gemini , deepgram
2426
2527
2628logger = logging .getLogger (__name__ )
@@ -34,9 +36,10 @@ async def create_agent(**kwargs) -> Agent:
3436 edge = getstream .Edge (),
3537 agent_user = User (name = "Friendly AI" , id = "agent" ),
3638 instructions = "You're a helpful voice AI assistant. Keep your responses concise and friendly." ,
37- tts = inworld .TTS (), # Uses Inworld AI for text-to-speech
39+ tts = inworld .TTS (),
40+ stt = deepgram .STT (),
3841 llm = gemini .LLM ("gemini-2.0-flash" ),
39- turn_detection = smart_turn .TurnDetection (buffer_in_seconds = 2.0 , confidence_threshold = 0.5 ),
42+ turn_detection = smart_turn .TurnDetection (),
4043 )
4144 return agent
4245
0 commit comments