Skip to content

Commit

Permalink
Merge pull request #341 from leng-yue/main
Browse files Browse the repository at this point in the history
Feat/Support Fish Audio
  • Loading branch information
tomasliu-agora authored Oct 21, 2024
2 parents 3ee9cda + 78c0bb2 commit 1c2c235
Show file tree
Hide file tree
Showing 9 changed files with 820 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ COSY_TTS_KEY=
# ElevenLabs TTS key
ELEVENLABS_TTS_KEY=

# Extension: fish_audio_tts
# Fish.audio TTS key
FISH_AUDIO_TTS_KEY=

# Extension: gemini_llm
# Gemini API key
GEMINI_API_KEY=
Expand Down
201 changes: 201 additions & 0 deletions agents/property.json
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,207 @@
}
]
},
{
"name": "va.openai.fish",
"auto_start": false,
"nodes": [
{
"type": "extension",
"extension_group": "default",
"addon": "agora_rtc",
"name": "agora_rtc",
"property": {
"app_id": "${env:AGORA_APP_ID}",
"token": "<agora_token>",
"channel": "astra_agents_test",
"stream_id": 1234,
"remote_stream_id": 123,
"subscribe_audio": true,
"publish_audio": true,
"publish_data": true,
"enable_agora_asr": true,
"agora_asr_vendor_name": "microsoft",
"agora_asr_language": "en-US",
"agora_asr_vendor_key": "${env:AZURE_STT_KEY}",
"agora_asr_vendor_region": "${env:AZURE_STT_REGION}",
"agora_asr_session_control_file_path": "session_control.conf"
}
},
{
"type": "extension",
"extension_group": "default",
"addon": "interrupt_detector",
"name": "interrupt_detector"
},
{
"type": "extension",
"extension_group": "chatgpt",
"addon": "openai_chatgpt",
"name": "openai_chatgpt",
"property": {
"base_url": "",
"api_key": "${env:OPENAI_API_KEY}",
"frequency_penalty": 0.9,
"model": "gpt-4o-mini",
"max_tokens": 512,
"prompt": "",
"proxy_url": "${env:OPENAI_PROXY_URL}",
"greeting": "TEN Agent connected. How can I help you today?",
"max_memory_length": 10
}
},
{
"type": "extension",
"extension_group": "tts",
"addon": "fish_audio_tts",
"name": "fish_audio_tts",
"property": {
"api_key": "${env:FISH_AUDIO_TTS_KEY}",
"model_id": "d8639b5cc95548f5afbcfe22d3ba5ce5",
"optimize_streaming_latency": true,
"request_timeout_seconds": 30,
"base_url": "https://api.fish.audio"
}
},
{
"type": "extension",
"extension_group": "transcriber",
"addon": "message_collector",
"name": "message_collector"
},
{
"type": "extension_group",
"addon": "default_extension_group",
"name": "default"
},
{
"type": "extension_group",
"addon": "default_extension_group",
"name": "chatgpt"
},
{
"type": "extension_group",
"addon": "default_extension_group",
"name": "tts"
},
{
"type": "extension_group",
"addon": "default_extension_group",
"name": "transcriber"
}
],
"connections": [
{
"extension_group": "default",
"extension": "agora_rtc",
"data": [
{
"name": "text_data",
"dest": [
{
"extension_group": "default",
"extension": "interrupt_detector"
},
{
"extension_group": "chatgpt",
"extension": "openai_chatgpt"
},
{
"extension_group": "transcriber",
"extension": "message_collector"
}
]
}
]
},
{
"extension_group": "chatgpt",
"extension": "openai_chatgpt",
"data": [
{
"name": "text_data",
"dest": [
{
"extension_group": "tts",
"extension": "fish_audio_tts"
},
{
"extension_group": "transcriber",
"extension": "message_collector"
}
]
}
],
"cmd": [
{
"name": "flush",
"dest": [
{
"extension_group": "tts",
"extension": "fish_audio_tts"
}
]
}
]
},
{
"extension_group": "tts",
"extension": "fish_audio_tts",
"audio_frame": [
{
"name": "pcm_frame",
"dest": [
{
"extension_group": "default",
"extension": "agora_rtc"
}
]
}
],
"cmd": [
{
"name": "flush",
"dest": [
{
"extension_group": "default",
"extension": "agora_rtc"
}
]
}
]
},
{
"extension_group": "transcriber",
"extension": "message_collector",
"data": [
{
"name": "data",
"dest": [
{
"extension_group": "default",
"extension": "agora_rtc"
}
]
}
]
},
{
"extension_group": "default",
"extension": "interrupt_detector",
"cmd": [
{
"name": "flush",
"dest": [
{
"extension_group": "chatgpt",
"extension": "openai_chatgpt"
}
]
}
]
}
]
},
{
"name": "va.bedrock.azure",
"auto_start": false,
Expand Down
128 changes: 128 additions & 0 deletions agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
*
* Agora Real Time Engagement
* Created by Hai Guo in 2024-08.
* Copyright (c) 2024 Agora IO. All rights reserved.
*
*/
// An extension written by Go for TTS
package extension

import (
"bytes"
"fmt"
"io"
"log/slog"
"net/http"
"time"

"github.com/vmihailenco/msgpack/v5"
)

type fishAudioTTS struct {
client *http.Client //?
config fishAudioTTSConfig
}

type fishAudioTTSConfig struct {
ApiKey string
ModelId string
OptimizeStreamingLatency bool
RequestTimeoutSeconds int
BaseUrl string
}

func defaultFishAudioTTSConfig() fishAudioTTSConfig {
return fishAudioTTSConfig{
ApiKey: "",
ModelId: "d8639b5cc95548f5afbcfe22d3ba5ce5",
OptimizeStreamingLatency: true,
RequestTimeoutSeconds: 30,
BaseUrl: "https://api.fish.audio",
}
}

func newFishAudioTTS(config fishAudioTTSConfig) (*fishAudioTTS, error) {
return &fishAudioTTS{
config: config,
client: &http.Client{
Transport: &http.Transport{
MaxIdleConnsPerHost: 10,
// Keep-Alive connection never expires
IdleConnTimeout: time.Second * 0,
},
Timeout: time.Second * time.Duration(config.RequestTimeoutSeconds),
},
}, nil
}

func (e *fishAudioTTS) textToSpeechStream(streamWriter io.Writer, text string) (err error) {
latency := "normal"
if e.config.OptimizeStreamingLatency {
latency = "balanced"
}

// Create the payload
payload := map[string]interface{}{
"text": text,
"chunk_length": 100,
"latency": latency,
"reference_id": e.config.ModelId,
"format": "pcm", // 44100/ 1ch/ 16bit
}

// Encode the payload to MessagePack
body, err := msgpack.Marshal(payload)
if err != nil {
panic(err)
}

// Create a new POST request
req, err := http.NewRequest("POST", e.config.BaseUrl+"/v1/tts", bytes.NewBuffer(body))
if err != nil {
panic(err)
}

// Set the headers
req.Header.Add("Authorization", "Bearer "+e.config.ApiKey)
req.Header.Set("Content-Type", "application/msgpack")

// Create a client and send the request
client := e.client
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()

if err != nil {
return fmt.Errorf("TextToSpeechStream failed, err: %v", err)
}

// Check the response status code
if resp.StatusCode != http.StatusOK {
slog.Error("Unexpected response status", "status", resp.StatusCode)
return fmt.Errorf("unexpected response status: %d", resp.StatusCode)
}

// Write the returned PCM data to streamWriter
buffer := make([]byte, 4096) // 4KB buffer size
for {
n, err := resp.Body.Read(buffer)
if err != nil && err != io.EOF {
slog.Error("Failed to read from response body", "error", err)
return fmt.Errorf("failed to read from response body: %w", err)
}
if n == 0 {
break // end of the stream
}

_, writeErr := streamWriter.Write(buffer[:n])
if writeErr != nil {
slog.Error("Failed to write to streamWriter", "error", writeErr)
return fmt.Errorf("failed to write to streamWriter: %w", writeErr)
}
}

return nil
}
Loading

0 comments on commit 1c2c235

Please sign in to comment.