Skip to content

Commit 4c29b23

Browse files
ghondarHenryHengZJ
andauthored
Feature/Add Groq Whisper support (#3706)
* feat: Add Groq Whisper support to SpeechToText component - Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components. - Updated SpeechToTextType to include GROQ_WHISPER. - Implemented Groq client integration for audio transcription with customizable model, language, and temperature options. - Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings. * turn speech to text none status to false when other was selected --------- Co-authored-by: Henry <[email protected]>
1 parent d549885 commit 4c29b23

File tree

3 files changed

+66
-2
lines changed

3 files changed

+66
-2
lines changed

packages/components/src/speechToText.ts

+20-1
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@ import { getCredentialData } from './utils'
33
import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
44
import { AssemblyAI } from 'assemblyai'
55
import { getFileFromStorage } from './storageUtils'
6+
import Groq from 'groq-sdk'
67

78
const SpeechToTextType = {
89
OPENAI_WHISPER: 'openAIWhisper',
910
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
10-
LOCALAI_STT: 'localAISTT'
11+
LOCALAI_STT: 'localAISTT',
12+
GROQ_WHISPER: 'groqWhisper'
1113
}
1214

1315
export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
@@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
7072
}
7173
break
7274
}
75+
case SpeechToTextType.GROQ_WHISPER: {
76+
const groqClient = new Groq({
77+
apiKey: credentialData.groqApiKey
78+
})
79+
const file = await toFile(audio_file, upload.name)
80+
const groqTranscription = await groqClient.audio.transcriptions.create({
81+
file,
82+
model: speechToTextConfig?.model || 'whisper-large-v3',
83+
language: speechToTextConfig?.language,
84+
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
85+
response_format: 'verbose_json'
86+
})
87+
if (groqTranscription?.text) {
88+
return groqTranscription.text
89+
}
90+
break
91+
}
7392
}
7493
} else {
7594
throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')
1.73 KB
Loading

packages/ui/src/ui-component/extended/SpeechToText.jsx

+46-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import { Dropdown } from '@/ui-component/dropdown/Dropdown'
1717
import openAISVG from '@/assets/images/openai.svg'
1818
import assemblyAIPng from '@/assets/images/assemblyai.png'
1919
import localAiPng from '@/assets/images/localai.png'
20+
import groqPng from '@/assets/images/groq.png'
2021

2122
// store
2223
import useNotifier from '@/utils/useNotifier'
@@ -29,7 +30,8 @@ import chatflowsApi from '@/api/chatflows'
2930
const SpeechToTextType = {
3031
OPENAI_WHISPER: 'openAIWhisper',
3132
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
32-
LOCALAI_STT: 'localAISTT'
33+
LOCALAI_STT: 'localAISTT',
34+
GROQ_WHISPER: 'groqWhisper'
3335
}
3436

3537
// Weird quirk - the key must match the name property value.
@@ -139,6 +141,46 @@ const speechToTextProviders = {
139141
optional: true
140142
}
141143
]
144+
},
145+
[SpeechToTextType.GROQ_WHISPER]: {
146+
label: 'Groq Whisper',
147+
name: SpeechToTextType.GROQ_WHISPER,
148+
icon: groqPng,
149+
url: 'https://console.groq.com/',
150+
inputs: [
151+
{
152+
label: 'Model',
153+
name: 'model',
154+
type: 'string',
155+
description: `The STT model to load. Defaults to whisper-large-v3 if left blank.`,
156+
placeholder: 'whisper-large-v3',
157+
optional: true
158+
},
159+
{
160+
label: 'Connect Credential',
161+
name: 'credential',
162+
type: 'credential',
163+
credentialNames: ['groqApi']
164+
},
165+
{
166+
label: 'Language',
167+
name: 'language',
168+
type: 'string',
169+
description:
170+
'The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.',
171+
placeholder: 'en',
172+
optional: true
173+
},
174+
{
175+
label: 'Temperature',
176+
name: 'temperature',
177+
type: 'number',
178+
step: 0.1,
179+
description:
180+
'The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.',
181+
optional: true
182+
}
183+
]
142184
}
143185
}
144186

@@ -210,6 +252,9 @@ const SpeechToText = ({ dialogProps }) => {
210252
newVal[provider.name] = { ...speechToText[provider.name], status: false }
211253
}
212254
})
255+
if (providerName !== 'none') {
256+
newVal['none'].status = false
257+
}
213258
}
214259
setSpeechToText(newVal)
215260
return newVal

0 commit comments

Comments
 (0)