Nash0x7E2
diff --git a/‎plugins/fish/README.md‎
Lines changed: 60 additions & 0 deletions b/‎plugins/fish/README.md‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎plugins/fish/example/README.md‎
Lines changed: 70 additions & 0 deletions b/‎plugins/fish/example/README.md‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎plugins/fish/example/__init__.py‎ b/‎plugins/fish/example/__init__.py‎
diff --git a/‎plugins/fish/example/fish_tts_example.py‎
Lines changed: 55 additions & 0 deletions b/‎plugins/fish/example/fish_tts_example.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎plugins/fish/example/pyproject.toml‎
Lines changed: 21 additions & 0 deletions b/‎plugins/fish/example/pyproject.toml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎plugins/fish/py.typed‎ b/‎plugins/fish/py.typed‎
diff --git a/‎plugins/fish/pyproject.toml‎
Lines changed: 41 additions & 0 deletions b/‎plugins/fish/pyproject.toml‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎plugins/fish/tests/__init__.py‎ b/‎plugins/fish/tests/__init__.py‎
diff --git a/‎plugins/fish/tests/test_tts.py‎
Lines changed: 98 additions & 0 deletions b/‎plugins/fish/tests/test_tts.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎plugins/fish/vision_agents/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎plugins/fish/vision_agents/__init__.py‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,60 @@
+# Fish Audio Text-to-Speech Plugin
+
+A high-quality Text-to-Speech (TTS) plugin for Vision Agents that uses the Fish Audio API.
+
+## Installation
+
+```bash
+pip install vision-agents-plugins-fish
+```
+
+## Usage
+
+```python
+from vision_agents.plugins.fish import TTS
+from getstream.video.rtc.audio_track import AudioStreamTrack
+
+# Initialize with API key from environment variable
+tts = TTS()
+
+# Or specify API key directly
+tts = TTS(api_key="your_fish_audio_api_key")
+
+# Create an audio track to output speech
+track = AudioStreamTrack(framerate=16000)
+tts.set_output_track(track)
+
+# Register event handlers
+@tts.on("audio")
+def on_audio(audio_data, user):
+    print(f"Received audio chunk: {len(audio_data)} bytes")
+
+# Send text to be converted to speech
+await tts.send("Hello, this is a test of the Fish Audio text-to-speech plugin.")
+```
+
+## Configuration Options
+
+- `api_key`: Fish Audio API key (default: reads from FISH_AUDIO_API_KEY environment variable)
+- `reference_id`: Optional reference voice ID to use for synthesis
+- `base_url`: Optional custom API endpoint (default: uses Fish Audio's default endpoint)
+
+## Reference Audio
+
+Fish Audio supports using reference audio for voice cloning:
+
+```python
+from vision_agents.plugins.fish import TTS
+
+# Using a reference voice ID
+tts = TTS(reference_id="your_reference_voice_id")
+
+# Or pass reference audio dynamically when sending text
+# (See Fish Audio SDK documentation for advanced usage)
+```
+
+## Requirements
+
+- Python 3.10+
+- fish-audio-sdk>=2025.4.2
+
@@ -0,0 +1,70 @@
+# Fish Audio TTS Examples
+
+This directory contains examples demonstrating how to use the Fish Audio TTS plugin with Vision Agents.
+
+## Examples
+
+### 1. Simple TTS Example (`simple_tts_example.py`)
+Basic usage of Fish Audio TTS without a full agent setup. Perfect for testing or simple integrations.
+
+### 2. Full Agent Example (`fish_tts_example.py`)
+Complete agent setup with Fish Audio TTS, Deepgram STT, and real-time communication.
+
+## Setup
+
+1. Install dependencies:
+```bash
+cd plugins/fish/example
+uv sync
+```
+
+2. Create a `.env` file with your API keys:
+```bash
+# Required for Fish Audio TTS
+FISH_AUDIO_API_KEY=your_fish_audio_api_key
+
+# Required for full agent example only:
+DEEPGRAM_API_KEY=your_deepgram_api_key
+STREAM_API_KEY=your_stream_api_key
+STREAM_API_SECRET=your_stream_api_secret
+```
+
+## Running the Examples
+
+### Simple TTS Example
+```bash
+uv run simple_tts_example.py
+```
+
+### Full Agent Example
+```bash
+uv run fish_tts_example.py
+```
+
+## What it does
+
+The example creates an AI agent that:
+- Uses **Fish Audio** for high-quality text-to-speech synthesis
+- Uses **Deepgram** for speech-to-text transcription
+- Uses **GetStream** for real-time communication
+- Uses **Smart Turn** detection for natural conversation flow
+
+The agent will greet you using Fish Audio's TTS and be ready to have a conversation.
+
+## Customization
+
+You can customize the Fish Audio TTS settings:
+
+```python
+# Use a specific reference voice
+tts = fish.TTS(reference_id="your_reference_voice_id")
+
+# Or use a custom endpoint
+tts = fish.TTS(base_url="https://your-custom-endpoint.com")
+```
+
+## Additional Resources
+
+- [Fish Audio Documentation](https://docs.fish.audio)
+- [Vision Agents Documentation](https://visionagents.ai)
+
@@ -0,0 +1,55 @@
+import asyncio
+import logging
+from uuid import uuid4
+
+from dotenv import load_dotenv
+
+from vision_agents.core import User
+from vision_agents.core.agents import Agent
+from vision_agents.plugins import fish, getstream, deepgram, smart_turn, gemini
+
+load_dotenv()
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [call_id=%(call_id)s] %(name)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+async def start_agent() -> None:
+    """
+    Example demonstrating Fish Audio TTS integration with Vision Agents.
+    
+    This example creates an agent that uses:
+    - Fish Audio for text-to-speech (TTS)
+    - Deepgram for speech-to-text (STT)
+    - GetStream for edge/real-time communication
+    - Smart Turn for turn detection
+    
+    Requirements:
+    - FISH_AUDIO_API_KEY environment variable
+    - DEEPGRAM_API_KEY environment variable
+    - STREAM_API_KEY and STREAM_API_SECRET environment variables
+    """
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="Friendly AI"),
+        instructions="You are a helpful AI assistant. Be friendly and conversational.",
+        tts=fish.TTS(),  # Uses Fish Audio for text-to-speech
+        stt=deepgram.STT(),
+        llm=gemini.LLM("gemini-2.0-flash"),
+        turn_detection=smart_turn.TurnDetection(buffer_duration=2.0, confidence_threshold=0.5),
+    )
+    await agent.create_user()
+
+    call = agent.edge.client.video.call("default", str(uuid4()))
+    await agent.edge.open_demo(call)
+
+    with await agent.join(call):
+        await asyncio.sleep(5)
+        # The agent will greet the user using Fish Audio TTS
+        await agent.llm.simple_response(text="Hello! I'm using Fish Audio for text-to-speech. How can I help you today?")
+        await agent.finish()
+
+
+if __name__ == "__main__":
+    asyncio.run(start_agent())
+
@@ -0,0 +1,21 @@
+[project]
+name = "fish-tts-example"
+version = "0.0.0"
+requires-python = ">=3.10"
+
+dependencies = [
+  "python-dotenv>=1.0",
+  "vision-agents-plugins-fish",
+  "vision-agents-plugins-getstream",
+  "vision-agents-plugins-deepgram",
+  "vision-agents-plugins-smart-turn",
+  "vision-agents",
+]
+
+[tool.uv.sources]
+"vision-agents-plugins-fish" = {path = "..", editable=true}
+"vision-agents-plugins-getstream" = {path = "../../getstream", editable=true}
+"vision-agents-plugins-deepgram" = {path = "../../deepgram", editable=true}
+"vision-agents-plugins-smart-turn" = {path = "../../smart_turn", editable=true}
+"vision-agents" = {path = "../../../agents-core", editable=true}
+
@@ -0,0 +1,41 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "vision-agents-plugins-fish"
+dynamic = ["version"]
+description = "Fish Audio TTS integration for Vision Agents"
+readme = "README.md"
+keywords = ["fish-audio", "TTS", "text-to-speech", "AI", "voice agents", "agents"]
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = [
+    "vision-agents",
+    "fish-audio-sdk>=2025.4.2",
+]
+
+[project.urls]
+Documentation = "https://visionagents.ai/"
+Website = "https://visionagents.ai/"
+Source = "https://github.com/GetStream/Vision-Agents"
+
+[tool.hatch.version]
+source = "vcs"
+raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
+
+[tool.hatch.build.targets.wheel]
+packages = [".", "vision_agents"]
+
+[tool.hatch.build.targets.sdist]
+include = ["/vision_agents"]
+
+[tool.uv.sources]
+vision-agents = { workspace = true }
+
+[dependency-groups]
+dev = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+]
+
@@ -0,0 +1,98 @@
+import os
+import asyncio
+
+import pytest
+from dotenv import load_dotenv
+
+from vision_agents.plugins import fish
+from vision_agents.core.tts.events import TTSAudioEvent, TTSErrorEvent
+from getstream.video.rtc.audio_track import AudioStreamTrack
+
+# Load environment variables
+load_dotenv()
+
+# Audio track for capturing test output
+class MockAudioTrack(AudioStreamTrack):
+    def __init__(self, framerate: int = 16000):
+        self.framerate = framerate
+        self.written_data = []
+
+    async def write(self, data: bytes):
+        self.written_data.append(data)
+        return True
+
+
+@pytest.mark.integration
+async def test_fish_tts_convert_text_to_audio():
+    """
+    Integration test with the real Fish Audio API.
+    
+    This test uses the actual Fish Audio API with the
+    FISH_AUDIO_API_KEY environment variable.
+    It will be skipped if the environment variable is not set.
+    
+    To set up the FISH_AUDIO_API_KEY:
+    1. Sign up for a Fish Audio account at https://fish.audio
+    2. Create an API key in your Fish Audio dashboard
+    3. Add to your .env file: FISH_AUDIO_API_KEY=your_api_key_here
+    """
+
+    
+    # Create a real Fish Audio TTS instance
+    tts = fish.TTS()
+    
+    # Create an audio track to capture the output
+    track = MockAudioTrack()
+    tts.set_output_track(track)
+    
+    # Track audio events
+    audio_received = asyncio.Event()
+    received_chunks = []
+    
+    @tts.events.subscribe
+    async def on_audio(event: TTSAudioEvent):
+        received_chunks.append(event.audio_data)
+        audio_received.set()
+    
+    # Track API errors
+    api_errors = []
+    
+    @tts.events.subscribe
+    async def on_error(event: TTSErrorEvent):
+        api_errors.append(event.error)
+        audio_received.set()  # Unblock the waiting
+    
+    # Allow event subscriptions to be processed
+    await asyncio.sleep(0.01)
+    
+    try:
+        # Use a short text to minimize API usage
+        text = "Hello from Fish Audio."
+        
+        # Send the text to generate speech
+        send_task = asyncio.create_task(tts.send(text))
+        
+        # Wait for either audio or an error
+        try:
+            await asyncio.wait_for(audio_received.wait(), timeout=15.0)
+        except asyncio.TimeoutError:
+            # Cancel the task if it's taking too long
+            send_task.cancel()
+            pytest.fail("No audio or error received within timeout")
+        
+        # Check if we received any API errors
+        if api_errors:
+            pytest.skip(f"API error received: {api_errors[0]}")
+        
+        # Try to ensure the send task completes
+        try:
+            await send_task
+        except Exception as e:
+            pytest.skip(f"Exception during TTS generation: {e}")
+        
+        # Verify that we received audio data
+        assert len(received_chunks) > 0, "No audio chunks were received"
+        assert len(track.written_data) > 0, "No audio data was written to track"
+    except Exception as e:
+        pytest.skip(f"Unexpected error in Fish Audio test: {e}")
+
@@ -0,0 +1,3 @@
+# Namespace package
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Namespace package`
	`2`	`+__path__ = __import__("pkgutil").extend_path(__path__, __name__)`
	`3`	`+`