diff --git a/.env.example b/.env.example index 70a544ae..4713a75f 100644 --- a/.env.example +++ b/.env.example @@ -63,6 +63,10 @@ COSY_TTS_KEY= # ElevenLabs TTS key ELEVENLABS_TTS_KEY= +# Extension: fish_audio_tts +# Fish.audio TTS key +FISH_AUDIO_TTS_KEY= + # Extension: gemini_llm # Gemini API key GEMINI_API_KEY= diff --git a/agents/property.json b/agents/property.json index 12c36d29..40bc5d04 100644 --- a/agents/property.json +++ b/agents/property.json @@ -624,6 +624,207 @@ } ] }, + { + "name": "va.openai.fish", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "${env:AGORA_APP_ID}", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "${env:AZURE_STT_KEY}", + "agora_asr_vendor_region": "${env:AZURE_STT_REGION}", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "${env:OPENAI_API_KEY}", + "frequency_penalty": 0.9, + "model": "gpt-4o-mini", + "max_tokens": 512, + "prompt": "", + "proxy_url": "${env:OPENAI_PROXY_URL}", + "greeting": "TEN Agent connected. How can I help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "fish_audio_tts", + "name": "fish_audio_tts", + "property": { + "api_key": "${env:FISH_AUDIO_TTS_KEY}", + "model_id": "d8639b5cc95548f5afbcfe22d3ba5ce5", + "optimize_streaming_latency": true, + "request_timeout_seconds": 30, + "base_url": "https://api.fish.audio" + } + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "message_collector", + "name": "message_collector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "fish_audio_tts" + }, + { + "extension_group": "transcriber", + "extension": "message_collector" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "fish_audio_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "fish_audio_tts", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "message_collector", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + }, { "name": "va.bedrock.azure", "auto_start": false, diff --git a/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go new file mode 100644 index 00000000..6ef40e08 --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go @@ -0,0 +1,128 @@ +/** + * + * Agora Real Time Engagement + * Created by Hai Guo in 2024-08. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// An extension written by Go for TTS +package extension + +import ( + "bytes" + "fmt" + "io" + "log/slog" + "net/http" + "time" + + "github.com/vmihailenco/msgpack/v5" +) + +type fishAudioTTS struct { + client *http.Client //? + config fishAudioTTSConfig +} + +type fishAudioTTSConfig struct { + ApiKey string + ModelId string + OptimizeStreamingLatency bool + RequestTimeoutSeconds int + BaseUrl string +} + +func defaultFishAudioTTSConfig() fishAudioTTSConfig { + return fishAudioTTSConfig{ + ApiKey: "", + ModelId: "d8639b5cc95548f5afbcfe22d3ba5ce5", + OptimizeStreamingLatency: true, + RequestTimeoutSeconds: 30, + BaseUrl: "https://api.fish.audio", + } +} + +func newFishAudioTTS(config fishAudioTTSConfig) (*fishAudioTTS, error) { + return &fishAudioTTS{ + config: config, + client: &http.Client{ + Transport: &http.Transport{ + MaxIdleConnsPerHost: 10, + // Keep-Alive connection never expires + IdleConnTimeout: time.Second * 0, + }, + Timeout: time.Second * time.Duration(config.RequestTimeoutSeconds), + }, + }, nil +} + +func (e *fishAudioTTS) textToSpeechStream(streamWriter io.Writer, text string) (err error) { + latency := "normal" + if e.config.OptimizeStreamingLatency { + latency = "balanced" + } + + // Create the payload + payload := map[string]interface{}{ + "text": text, + "chunk_length": 100, + "latency": latency, + "reference_id": e.config.ModelId, + "format": "pcm", // 44100/ 1ch/ 16bit + } + + // Encode the payload to MessagePack + body, err := msgpack.Marshal(payload) + if err != nil { + panic(err) + } + + // Create a new POST request + req, err := http.NewRequest("POST", e.config.BaseUrl+"/v1/tts", bytes.NewBuffer(body)) + if err != nil { + panic(err) + } + + // Set the headers + req.Header.Add("Authorization", "Bearer "+e.config.ApiKey) + req.Header.Set("Content-Type", "application/msgpack") + + // Create a client and send the request + client := e.client + resp, err := client.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + + if err != nil { + return fmt.Errorf("TextToSpeechStream failed, err: %v", err) + } + + // Check the response status code + if resp.StatusCode != http.StatusOK { + slog.Error("Unexpected response status", "status", resp.StatusCode) + return fmt.Errorf("unexpected response status: %d", resp.StatusCode) + } + + // Write the returned PCM data to streamWriter + buffer := make([]byte, 4096) // 4KB buffer size + for { + n, err := resp.Body.Read(buffer) + if err != nil && err != io.EOF { + slog.Error("Failed to read from response body", "error", err) + return fmt.Errorf("failed to read from response body: %w", err) + } + if n == 0 { + break // end of the stream + } + + _, writeErr := streamWriter.Write(buffer[:n]) + if writeErr != nil { + slog.Error("Failed to write to streamWriter", "error", writeErr) + return fmt.Errorf("failed to write to streamWriter: %w", writeErr) + } + } + + return nil +} diff --git a/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go new file mode 100644 index 00000000..3b017e17 --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go @@ -0,0 +1,308 @@ +/** + * + * Agora Real Time Engagement + * Created by Hai Guo in 2024-08. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// An extension written by Go for TTS +package extension + +import ( + "fmt" + "io" + "log/slog" + "sync" + "sync/atomic" + "time" + + "ten_framework/ten" +) + +const ( + cmdInFlush = "flush" + cmdOutFlush = "flush" + dataInTextDataPropertyText = "text" + + propertyApiKey = "api_key" // Required + propertyModelId = "model_id" // Optional + propertyOptimizeStreamingLatency = "optimize_streaming_latency" // Optional + propertyRequestTimeoutSeconds = "request_timeout_seconds" // Optional + propertyBaseUrl = "base_url" // Optional +) + +const ( + textChanMax = 1024 +) + +var ( + logTag = slog.String("extension", "FISH_AUDIO_TTS_EXTENSION") + + outdateTs atomic.Int64 + textChan chan *message + wg sync.WaitGroup +) + +type fishAudioTTSExtension struct { + ten.DefaultExtension + fishAudioTTS *fishAudioTTS +} + +type message struct { + text string + receivedTs int64 +} + +func newFishAudioTTSExtension(name string) ten.Extension { + return &fishAudioTTSExtension{} +} + +// OnStart will be called when the extension is starting, +// properies can be read here to initialize and start the extension. +// current supported properties: +// - api_key (required) +// - model_id +// - optimize_streaming_latency +// - request_timeout_seconds +// - base_url +func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { + slog.Info("OnStart", logTag) + + // prepare configuration + fishAudioTTSConfig := defaultFishAudioTTSConfig() + + if apiKey, err := ten.GetPropertyString(propertyApiKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + return + } else { + fishAudioTTSConfig.ApiKey = apiKey + } + + if modelId, err := ten.GetPropertyString(propertyModelId); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err), logTag) + } else { + if len(modelId) > 0 { + fishAudioTTSConfig.ModelId = modelId + } + } + + if optimizeStreamingLatency, err := ten.GetPropertyBool(propertyOptimizeStreamingLatency); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err), logTag) + } else { + fishAudioTTSConfig.OptimizeStreamingLatency = optimizeStreamingLatency + } + + if requestTimeoutSeconds, err := ten.GetPropertyInt64(propertyRequestTimeoutSeconds); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err), logTag) + } else { + if requestTimeoutSeconds > 0 { + fishAudioTTSConfig.RequestTimeoutSeconds = int(requestTimeoutSeconds) + } + } + + if baseUrl, err := ten.GetPropertyString(propertyBaseUrl); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyBaseUrl, err), logTag) + } else { + if len(baseUrl) > 0 { + fishAudioTTSConfig.BaseUrl = baseUrl + } + } + + // create fishAudioTTS instance + fishAudioTTS, err := newFishAudioTTS(fishAudioTTSConfig) + if err != nil { + slog.Error(fmt.Sprintf("newFishAudioTTS failed, err: %v", err), logTag) + return + } + + slog.Info(fmt.Sprintf("newFishAudioTTS succeed with ModelId: %s", + fishAudioTTSConfig.ModelId), logTag) + + // set fishAudio instance + e.fishAudioTTS = fishAudioTTS + + // create pcm instance + pcm := newPcm(defaultPcmConfig()) + pcmFrameSize := pcm.getPcmFrameSize() + + // init chan + textChan = make(chan *message, textChanMax) + + go func() { + slog.Info("process textChan", logTag) + + for msg := range textChan { + if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load()), logTag) + continue + } + + wg.Add(1) + slog.Info(fmt.Sprintf("textChan text: [%s]", msg.text), logTag) + + r, w := io.Pipe() + startTime := time.Now() + + go func() { + defer wg.Done() + defer w.Close() + + slog.Info(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text), logTag) + err = e.fishAudioTTS.textToSpeechStream(w, msg.text) + slog.Info(fmt.Sprintf("textToSpeechStream result: [%v]", err), logTag) + if err != nil { + slog.Error(fmt.Sprintf("textToSpeechStream failed, err: %v", err), logTag) + return + } + }() + + slog.Info(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize), logTag) + + var ( + firstFrameLatency int64 + n int + pcmFrameRead int + readBytes int + sentFrames int + ) + buf := pcm.newBuf() + + // read pcm stream + for { + if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load()), logTag) + break + } + + n, err = r.Read(buf[pcmFrameRead:]) + readBytes += n + pcmFrameRead += n + + if err != nil { + if err == io.EOF { + slog.Info("read pcm stream EOF", logTag) + break + } + + slog.Error(fmt.Sprintf("read pcm stream failed, err: %v", err), logTag) + break + } + + if pcmFrameRead != pcmFrameSize { + slog.Debug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead), logTag) + continue + } + + pcm.send(ten, buf) + // clear buf + buf = pcm.newBuf() + pcmFrameRead = 0 + sentFrames++ + + if firstFrameLatency == 0 { + firstFrameLatency = time.Since(startTime).Milliseconds() + slog.Info(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency), logTag) + } + + slog.Debug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text), logTag) + } + + if pcmFrameRead > 0 { + pcm.send(ten, buf) + sentFrames++ + slog.Info(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead), logTag) + } + + r.Close() + slog.Info(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", + msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds()), logTag) + } + }() + + ten.OnStartDone() +} + +// OnCmd receives cmd from ten graph. +// current supported cmd: +// - name: flush +// example: +// {"name": "flush"} +func (e *fishAudioTTSExtension) OnCmd( + tenEnv ten.TenEnv, + cmd ten.Cmd, +) { + cmdName, err := cmd.GetName() + if err != nil { + slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) + tenEnv.ReturnResult(cmdResult, cmd) + return + } + + slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + + switch cmdName { + case cmdInFlush: + outdateTs.Store(time.Now().UnixMicro()) + + // send out + outCmd, err := ten.NewCmd(cmdOutFlush) + if err != nil { + slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) + tenEnv.ReturnResult(cmdResult, cmd) + return + } + + if err := tenEnv.SendCmd(outCmd, nil); err != nil { + slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) + tenEnv.ReturnResult(cmdResult, cmd) + return + } else { + slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + } + } + + cmdResult, _ := ten.NewCmdResult(ten.StatusCodeOk) + tenEnv.ReturnResult(cmdResult, cmd) +} + +// OnData receives data from ten graph. +// current supported data: +// - name: text_data +// example: +// {name: text_data, properties: {text: "hello"} +func (e *fishAudioTTSExtension) OnData( + tenEnv ten.TenEnv, + data ten.Data, +) { + text, err := data.GetPropertyString(dataInTextDataPropertyText) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + return + } + + if len(text) == 0 { + slog.Debug("OnData text is empty, ignored", logTag) + return + } + + slog.Info(fmt.Sprintf("OnData input text: [%s]", text), logTag) + + go func() { + textChan <- &message{text: text, receivedTs: time.Now().UnixMicro()} + }() +} + +func init() { + slog.Info("fish_audio_tts extension init", logTag) + + // Register addon + ten.RegisterAddonAsExtension( + "fish_audio_tts", + ten.NewDefaultExtensionAddon(newFishAudioTTSExtension), + ) +} diff --git a/agents/ten_packages/extension/fish_audio_tts/go.mod b/agents/ten_packages/extension/fish_audio_tts/go.mod new file mode 100644 index 00000000..de51965a --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/go.mod @@ -0,0 +1,12 @@ +module fish_audio_tts + +go 1.20 + +replace ten_framework => ../../system/ten_runtime_go/interface + +require ( + github.com/vmihailenco/msgpack/v5 v5.4.1 + ten_framework v0.0.0-00010101000000-000000000000 +) + +require github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect diff --git a/agents/ten_packages/extension/fish_audio_tts/go.sum b/agents/ten_packages/extension/fish_audio_tts/go.sum new file mode 100644 index 00000000..9bb51844 --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/go.sum @@ -0,0 +1,8 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= +github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= diff --git a/agents/ten_packages/extension/fish_audio_tts/manifest.json b/agents/ten_packages/extension/fish_audio_tts/manifest.json new file mode 100644 index 00000000..d717fd53 --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/manifest.json @@ -0,0 +1,56 @@ +{ + "type": "extension", + "name": "fish_audio_tts", + "version": "0.4.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_go", + "version": "0.2" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model_id": { + "type": "string" + }, + "request_timeout_seconds": { + "type": "int64" + }, + "optimize_streaming_latency": { + "type": "bool" + }, + "base_url": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "audio_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} \ No newline at end of file diff --git a/agents/ten_packages/extension/fish_audio_tts/pcm.go b/agents/ten_packages/extension/fish_audio_tts/pcm.go new file mode 100644 index 00000000..f78efdde --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/pcm.go @@ -0,0 +1,102 @@ +/** + * + * Agora Real Time Engagement + * Created by Hai Guo in 2024-08. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// An extension written by Go for TTS +package extension + +import ( + "fmt" + "log/slog" + + "ten_framework/ten" +) + +type pcm struct { + config *pcmConfig +} + +type pcmConfig struct { + BytesPerSample int32 + Channel int32 + ChannelLayout uint64 + Name string + SampleRate int32 + SamplesPerChannel int32 + Timestamp int64 +} + +func defaultPcmConfig() *pcmConfig { + return &pcmConfig{ + BytesPerSample: 2, + Channel: 1, + ChannelLayout: 1, + Name: "pcm_frame", + SampleRate: 44100, + SamplesPerChannel: 44100 / 100, + Timestamp: 0, + } +} + +func newPcm(config *pcmConfig) *pcm { + return &pcm{ + config: config, + } +} + +func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { + pcmFrame, err = ten.NewAudioFrame(p.config.Name) + if err != nil { + slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) + return + } + + // set pcm frame + pcmFrame.SetBytesPerSample(p.config.BytesPerSample) + pcmFrame.SetSampleRate(p.config.SampleRate) + pcmFrame.SetChannelLayout(p.config.ChannelLayout) + pcmFrame.SetNumberOfChannels(p.config.Channel) + pcmFrame.SetTimestamp(p.config.Timestamp) + pcmFrame.SetDataFmt(ten.AudioFrameDataFmtInterleave) + pcmFrame.SetSamplesPerChannel(p.config.SamplesPerChannel) + pcmFrame.AllocBuf(p.getPcmFrameSize()) + + borrowedBuf, err := pcmFrame.LockBuf() + if err != nil { + slog.Error(fmt.Sprintf("LockBuf failed, err: %v", err), logTag) + return + } + + // copy data + copy(borrowedBuf, buf) + + pcmFrame.UnlockBuf(&borrowedBuf) + return +} + +func (p *pcm) getPcmFrameSize() int { + return int(p.config.SamplesPerChannel * p.config.Channel * p.config.BytesPerSample) +} + +func (p *pcm) newBuf() []byte { + return make([]byte, p.getPcmFrameSize()) +} + +func (p *pcm) send(tenEnv ten.TenEnv, buf []byte) (err error) { + pcmFrame, err := p.getPcmFrame(buf) + if err != nil { + slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) + return + } + + // send pcm + if err = tenEnv.SendAudioFrame(pcmFrame); err != nil { + slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) + return + } + + return +} diff --git a/agents/ten_packages/extension/fish_audio_tts/property.json b/agents/ten_packages/extension/fish_audio_tts/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/ten_packages/extension/fish_audio_tts/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file