Skip to content

Commit

Permalink
Bugfix/speech input on Safari/iOS (#1971)
Browse files Browse the repository at this point in the history
* debug to identify

* Safari sends audio file as mp4 and nor webp

* Safari on iOS needs special handling

* lint fixes

* updated condition

* Remove unused import

---------

Co-authored-by: Ilango <[email protected]>
  • Loading branch information
vinodkiran and 0xi4o authored Apr 19, 2024
1 parent d1c8f7e commit 6bd8aae
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 5 deletions.
2 changes: 0 additions & 2 deletions packages/components/src/speechToText.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
const credentialId = speechToTextConfig.credentialId as string
const credentialData = await getCredentialData(credentialId ?? '', options)
const filePath = path.join(getStoragePath(), options.chatflowid, options.chatId, upload.name)

const audio_file = fs.createReadStream(filePath)

if (speechToTextConfig.name === 'openAIWhisper') {
const openAIClientOptions: ClientOptions = {
apiKey: credentialData.openAIApiKey
}
const openAIClient = new OpenAIClient(openAIClientOptions)

const transcription = await openAIClient.audio.transcriptions.create({
file: audio_file,
model: 'whisper-1',
Expand Down
3 changes: 2 additions & 1 deletion packages/server/src/utils/buildChatflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
fileUploads = incomingInput.uploads
for (let i = 0; i < fileUploads.length; i += 1) {
const upload = fileUploads[i]

if ((upload.type === 'file' || upload.type === 'audio') && upload.data) {
const filename = upload.name
const dir = path.join(getStoragePath(), chatflowid, chatId)
Expand All @@ -83,7 +84,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
}

// Run Speech to Text conversion
if (upload.mime === 'audio/webm') {
if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4') {
let speechToTextConfig: ICommonObject = {}
if (chatflow.speechToText) {
const speechToTextProviders = JSON.parse(chatflow.speechToText)
Expand Down
8 changes: 7 additions & 1 deletion packages/ui/src/views/chatmessage/ChatMessage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,13 @@ export const ChatMessage = ({ open, chatflowid, isDialog, previews, setPreviews
}

const addRecordingToPreviews = (blob) => {
const mimeType = blob.type.substring(0, blob.type.indexOf(';'))
let mimeType = ''
const pos = blob.type.indexOf(';')
if (pos === -1) {
mimeType = blob.type
} else {
mimeType = blob.type.substring(0, pos)
}
// read blob and add to previews
const reader = new FileReader()
reader.readAsDataURL(blob)
Expand Down
9 changes: 8 additions & 1 deletion packages/ui/src/views/chatmessage/audio-recording.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* @fileoverview This file contains the API to handle audio recording.
* Originally from 'https://ralzohairi.medium.com/audio-recording-in-javascript-96eed45b75ee'
*/
import { isSafari } from 'react-device-detect'

// audio-recording.js ---------------
let microphoneButton, elapsedTimeTag
Expand Down Expand Up @@ -277,7 +278,13 @@ export const audioRecorder = {
})

//start the recording by calling the start method on the media recorder
audioRecorder.mediaRecorder.start()
if (isSafari) {
// https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/322252
// https://community.openai.com/t/whisper-api-cannot-read-files-correctly/93420/46
audioRecorder.mediaRecorder.start(1000)
} else {
audioRecorder.mediaRecorder.start()
}
})
)

Expand Down

0 comments on commit 6bd8aae

Please sign in to comment.