From eb37a2fe996a7a57dab2ce19a832f63503412d20 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 6 Apr 2026 15:30:33 -0400 Subject: [PATCH 1/5] revert: disable Teleport feature flag by default (#23744) (#23815) --- meta/feature-flags/feature-flag-registry.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meta/feature-flags/feature-flag-registry.json b/meta/feature-flags/feature-flag-registry.json index 18cf644533..db808206ef 100644 --- a/meta/feature-flags/feature-flag-registry.json +++ b/meta/feature-flags/feature-flag-registry.json @@ -279,7 +279,7 @@ "key": "teleport", "label": "Teleport", "description": "Enable teleport UI in General settings for moving assistants between hosting environments", - "defaultEnabled": true + "defaultEnabled": false }, { "id": "permission-controls-v2", From 9e4561927739e6d24938e1900af5d53d5c6e3951 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 13:56:45 -0400 Subject: [PATCH 2/5] fix: replace auxWhite-on-primaryBase with VButton across the app (#23802) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: use VButton for inline surface action buttons Replace raw Button with manual color functions in InlineSurfaceRouter with the design system VButton component. The manual buttonForeground used VColor.auxWhite (always #FFFFFF) against VColor.primaryBase which resolves to #FDFDFC in dark mode, producing invisible white-on-white text. Closes LUM-730 Co-Authored-By: ashlee@vellum.ai * fix: replace auxWhite-on-primaryBase with VButton in additional locations FileUploadSurfaceView: Upload/Cancel buttons used raw Button with VColor.auxWhite on VColor.primaryBase — white-on-white in dark mode. Replaced with VButton(.primary) and VButton(.outlined). JITPermissionView: Permission buttons used the same auxWhite pattern. Replaced with VButton(.primary/.outlined, isFullWidth: true). ImproveExperienceStepView: ToS checkbox checkmark used auxWhite on primaryBase fill. Changed to VColor.contentInset which adapts per color scheme. ChatGallerySection: Gallery demo of surface action pills mirrored the old buggy pattern. Updated to use VButton so the gallery accurately represents production rendering. Co-Authored-By: ashlee@vellum.ai --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: ashlee@vellum.ai --- .../FirstMeeting/JITPermissionView.swift | 22 ++++--------- .../ImproveExperienceStepView.swift | 2 +- .../Gallery/Sections/ChatGallerySection.swift | 26 +++------------ .../InlineWidgets/InlineSurfaceRouter.swift | 33 +++++-------------- .../Surfaces/FileUploadSurfaceView.swift | 27 +++------------ 5 files changed, 24 insertions(+), 86 deletions(-) diff --git a/clients/macos/vellum-assistant/Features/Onboarding/FirstMeeting/JITPermissionView.swift b/clients/macos/vellum-assistant/Features/Onboarding/FirstMeeting/JITPermissionView.swift index 34f311e372..6c7cde1abc 100644 --- a/clients/macos/vellum-assistant/Features/Onboarding/FirstMeeting/JITPermissionView.swift +++ b/clients/macos/vellum-assistant/Features/Onboarding/FirstMeeting/JITPermissionView.swift @@ -185,22 +185,12 @@ struct JITPermissionView: View { @ViewBuilder private func permissionButton(_ title: String, isPrimary: Bool, action: @escaping () -> Void) -> some View { - Button(action: action) { - Text(title) - .font(VFont.labelDefault) - .foregroundStyle(isPrimary ? VColor.auxWhite : VColor.contentDefault.opacity(0.85)) - .frame(maxWidth: .infinity) - .padding(.horizontal, VSpacing.sm) - .padding(.vertical, VSpacing.sm + VSpacing.xxs) - .background(isPrimary ? AnyShapeStyle(VColor.primaryBase) : AnyShapeStyle(Color.clear)) - .clipShape(RoundedRectangle(cornerRadius: VRadius.md)) - .overlay( - RoundedRectangle(cornerRadius: VRadius.md) - .stroke(isPrimary ? Color.clear : VColor.contentDefault.opacity(0.2), lineWidth: 1) - ) - } - .buttonStyle(.plain) - .frame(maxWidth: .infinity) + VButton( + label: title, + style: isPrimary ? .primary : .outlined, + isFullWidth: true, + action: action + ) } private func dismiss() { diff --git a/clients/macos/vellum-assistant/Features/Onboarding/ImproveExperienceStepView.swift b/clients/macos/vellum-assistant/Features/Onboarding/ImproveExperienceStepView.swift index 3267802ba3..2fd84799df 100644 --- a/clients/macos/vellum-assistant/Features/Onboarding/ImproveExperienceStepView.swift +++ b/clients/macos/vellum-assistant/Features/Onboarding/ImproveExperienceStepView.swift @@ -107,7 +107,7 @@ struct ImproveExperienceStepView: View { if tosAccepted { VIconView(.check, size: 12) - .foregroundStyle(VColor.auxWhite) + .foregroundStyle(VColor.contentInset) } } .frame(width: 20, height: 20) diff --git a/clients/shared/DesignSystem/Gallery/Sections/ChatGallerySection.swift b/clients/shared/DesignSystem/Gallery/Sections/ChatGallerySection.swift index ba0f8b685c..a27a9ae0a8 100644 --- a/clients/shared/DesignSystem/Gallery/Sections/ChatGallerySection.swift +++ b/clients/shared/DesignSystem/Gallery/Sections/ChatGallerySection.swift @@ -525,30 +525,14 @@ struct ChatGallerySection: View { } private func surfaceActionPill(label: String, style: SurfaceActionStyle) -> some View { - Text(label) - .font(VFont.bodyMediumDefault) - .foregroundStyle(surfaceActionForeground(style)) - .padding(.horizontal, VSpacing.lg) - .padding(.vertical, VSpacing.sm) - .background( - RoundedRectangle(cornerRadius: VRadius.md) - .fill(surfaceActionBackground(style)) - ) + VButton(label: label, style: surfaceActionButtonStyle(style)) {} } - private func surfaceActionForeground(_ style: SurfaceActionStyle) -> Color { + private func surfaceActionButtonStyle(_ style: SurfaceActionStyle) -> VButton.Style { switch style { - case .primary: return VColor.auxWhite - case .destructive: return VColor.auxWhite - case .secondary: return VColor.contentDefault - } - } - - private func surfaceActionBackground(_ style: SurfaceActionStyle) -> Color { - switch style { - case .primary: return VColor.primaryBase - case .destructive: return VColor.systemNegativeStrong - case .secondary: return VColor.borderBase.opacity(0.5) + case .primary: return .primary + case .secondary: return .outlined + case .destructive: return .danger } } } diff --git a/clients/shared/Features/Chat/InlineWidgets/InlineSurfaceRouter.swift b/clients/shared/Features/Chat/InlineWidgets/InlineSurfaceRouter.swift index 495e9c858c..987e422b7b 100644 --- a/clients/shared/Features/Chat/InlineWidgets/InlineSurfaceRouter.swift +++ b/clients/shared/Features/Chat/InlineWidgets/InlineSurfaceRouter.swift @@ -358,9 +358,11 @@ public struct InlineSurfaceRouter: View { } else { VStack(alignment: .leading, spacing: VSpacing.sm) { ForEach(surface.actions, id: \.uniqueId) { action in - Button { + VButton( + label: action.label, + style: buttonStyle(for: action.style) + ) { clickedActionLabel = action.label - // Merge action.data (button payload) with selectionPayload (list selection) var merged = selectionPayload ?? [:] if let actionData = action.data { for (key, value) in actionData { @@ -368,36 +370,17 @@ public struct InlineSurfaceRouter: View { } } onAction(surface.id, action.id, merged.isEmpty ? nil : merged) - } label: { - Text(action.label) - .font(VFont.bodyMediumDefault) - .foregroundStyle(buttonForeground(action.style)) - .padding(.horizontal, VSpacing.lg) - .padding(.vertical, VSpacing.sm) - .background( - RoundedRectangle(cornerRadius: VRadius.md) - .fill(buttonBackground(action.style)) - ) } - .buttonStyle(.plain) } } } } - private func buttonForeground(_ style: SurfaceActionStyle) -> Color { - switch style { - case .primary: return VColor.auxWhite - case .destructive: return VColor.auxWhite - case .secondary: return VColor.contentDefault - } - } - - private func buttonBackground(_ style: SurfaceActionStyle) -> Color { + private func buttonStyle(for style: SurfaceActionStyle) -> VButton.Style { switch style { - case .primary: return VColor.primaryBase - case .destructive: return VColor.systemNegativeStrong - case .secondary: return VColor.borderBase.opacity(0.5) + case .primary: return .primary + case .secondary: return .outlined + case .destructive: return .danger } } diff --git a/clients/shared/Features/Surfaces/FileUploadSurfaceView.swift b/clients/shared/Features/Surfaces/FileUploadSurfaceView.swift index 2dc7a56abc..75b7b505be 100644 --- a/clients/shared/Features/Surfaces/FileUploadSurfaceView.swift +++ b/clients/shared/Features/Surfaces/FileUploadSurfaceView.swift @@ -93,32 +93,13 @@ public struct FileUploadSurfaceView: View { HStack(spacing: VSpacing.lg) { Spacer() - Button(action: { onCancel() }) { - Text("Cancel") - .font(VFont.labelDefault) - .foregroundStyle(VColor.contentSecondary) - .padding(.horizontal, VSpacing.lg) - .padding(.vertical, VSpacing.sm) - .overlay( - RoundedRectangle(cornerRadius: VRadius.sm) - .stroke(VColor.borderBase, lineWidth: 1) - ) + VButton(label: "Cancel", style: .outlined, size: .compact) { + onCancel() } - .buttonStyle(.plain) - Button(action: { submitFiles() }) { - Text("Upload") - .font(VFont.labelDefault) - .foregroundStyle(selectedFiles.isEmpty ? VColor.contentTertiary : VColor.auxWhite) - .padding(.horizontal, VSpacing.lg) - .padding(.vertical, VSpacing.sm) - .background( - RoundedRectangle(cornerRadius: VRadius.sm) - .fill(selectedFiles.isEmpty ? VColor.surfaceOverlay : VColor.primaryBase) - ) + VButton(label: "Upload", style: .primary, size: .compact, isDisabled: selectedFiles.isEmpty) { + submitFiles() } - .buttonStyle(.plain) - .disabled(selectedFiles.isEmpty) } } } From fcd93c40a2faa1aa51bcfd14e80980083d2ca1e9 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 16:00:57 -0400 Subject: [PATCH 3/5] Make dictation engine start non-blocking with audio route resilience (#23811) * Make dictation engine start non-blocking and improve audio resilience - Add installTapAndStartAsync to AudioEngineController for non-blocking engine start using Swift concurrency (withCheckedContinuation) - Extract installTapAndStartImpl to share logic between sync/async paths - Listen for AVAudioEngineConfigurationChange to re-prewarm inputNode after Bluetooth device connect/disconnect and AirPods mode switches - Restructure VoiceInputManager.beginRecording() to show recording UI and play activation chime immediately, then start engine async via Task - Move DictationContextCapture off the critical path: engine starts concurrently on its audio queue while context capture runs on main - Add SFSpeechRecognizer transient unavailability retry (recreate if isAvailable returns false after sleep/wake or heavy use) - Handle edge case where PTT is released before async engine start completes (stopRecordingForDictation cleans up directly) Co-Authored-By: tkheyfets * Tear down engine when async startup outlives recording session When PTT is released before installTapAndStartAsync completes, the isRecording guard now stops and removes the tap if the engine started successfully, preventing the mic path from staying alive with no active recording session. Co-Authored-By: tkheyfets * Add recording generation token and gate context capture on start success Co-Authored-By: tkheyfets * Guard stale teardown against active sessions and gate rewarm on mic auth Co-Authored-By: tkheyfets * Move context capture to Task.detached to avoid blocking main actor Co-Authored-By: tkheyfets --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: tkheyfets --- .../App/VoiceInputManager.swift | 230 ++++++++++++------ .../Voice/AudioEngineController.swift | 99 ++++++-- .../Features/Voice/OpenAIVoiceService.swift | 3 +- 3 files changed, 239 insertions(+), 93 deletions(-) diff --git a/clients/macos/vellum-assistant/App/VoiceInputManager.swift b/clients/macos/vellum-assistant/App/VoiceInputManager.swift index ce491b4562..84fc1d800c 100644 --- a/clients/macos/vellum-assistant/App/VoiceInputManager.swift +++ b/clients/macos/vellum-assistant/App/VoiceInputManager.swift @@ -82,6 +82,12 @@ final class VoiceInputManager { /// Guards against double-start/double-stop from rapid key events. private var isActivatorHeld = false + /// Monotonically increasing counter identifying the current recording + /// session. The async engine-start Task captures this value and checks + /// it after `await` — if it no longer matches, the completion belongs + /// to a stale session and is discarded. + private var recordingGeneration: UInt64 = 0 + /// Whether `start()` has been called (monitors are active). /// Used to guard against duplicate registration from deferred startup. private(set) var hasStarted = false @@ -110,7 +116,7 @@ final class VoiceInputManager { PTTActivator.cached } - private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) + private var speechRecognizer: SFSpeechRecognizer? = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private let engineController = AudioEngineController(label: "com.vellum.audioEngine.voiceInput") @@ -549,18 +555,32 @@ final class VoiceInputManager { holdTask = nil } - /// Capture frontmost app context (for dictation) and begin recording. + /// Start recording immediately for instant UI feedback, then capture + /// frontmost app context off the main actor. The engine starts + /// asynchronously on its audio queue while context capture runs on a + /// detached Task — both happen concurrently without blocking the main + /// actor, so key-up events are processed immediately. + /// /// When Vellum itself is the frontmost app, skip context capture so the /// transcription falls through to the conversation path (auto-submit to chat) /// instead of going through DictationTextInserter which would double-insert. private func captureContextAndBeginRecording() { + beginRecording() + guard isRecording else { return } if currentMode == .dictation { let isVellumFrontmost = NSWorkspace.shared.frontmostApplication?.bundleIdentifier == Bundle.main.bundleIdentifier if !isVellumFrontmost { - currentDictationContext = DictationContextCapture.capture() + let generation = recordingGeneration + Task.detached { [weak self] in + let context = DictationContextCapture.capture() + await MainActor.run { [weak self] in + guard let self else { return } + guard self.isRecording, self.recordingGeneration == generation else { return } + self.currentDictationContext = context + } + } } } - beginRecording() } /// Stop recording using the appropriate method for the current mode. @@ -592,6 +612,12 @@ final class VoiceInputManager { // MARK: - Recording private func beginRecording() { + // Recreate speech recognizer if transiently unavailable (e.g. after + // sleep/wake, heavy use, or audio route changes). + if speechRecognizer?.isAvailable != true { + log.warning("Speech recognizer unavailable — recreating") + speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) + } guard let speechRecognizer = speechRecognizer, speechRecognizer.isAvailable else { log.error("Speech recognizer not available") currentDictationContext = nil @@ -637,6 +663,11 @@ final class VoiceInputManager { return } + // Show recording state and play chime immediately for instant feedback. + // The audio engine starts asynchronously below — the user hears/sees the + // activation before the engine is ready, hiding hardware latency. + recordingGeneration &+= 1 + let generation = recordingGeneration isRecording = true onRecordingStateChanged?(true) if currentMode == .dictation { @@ -647,6 +678,7 @@ final class VoiceInputManager { } } log.info("Voice recording started") + VoiceFeedback.playActivationChime() let request = SFSpeechAudioBufferRecognitionRequest() request.shouldReportPartialResults = true @@ -655,87 +687,107 @@ final class VoiceInputManager { let ampState = amplitudeState ampState.reset() - // Atomically read the hardware format, install the tap, and start the - // engine in a single dispatch to the audio queue. This prevents the - // TOCTOU race where a format read via `inputNodeFormat()` becomes stale - // before a separate `installTap()` async block executes — which crashes - // with NSInternalInconsistencyException on first use after permission grant. - guard engineController.installTapAndStart( - bufferSize: 1024, - block: { [weak self] buffer, _ in - request.append(buffer) - - guard let channelData = buffer.floatChannelData else { return } - let frameLength = Int(buffer.frameLength) - guard frameLength > 0 else { return } - - let channelDataArray = Array(UnsafeBufferPointer(start: channelData[0], count: frameLength)) - let rawRMS = vDSP.rootMeanSquare(channelDataArray) - - let smoothed = 0.5 * rawRMS + 0.5 * ampState.previousSmoothed - ampState.previousSmoothed = smoothed - - // Scale amplitude to 0-1 range for waveform visualization. - // Speech RMS is typically 0.01-0.1; multiply to fill the visual range. - let scaled = min(smoothed * 14.0, 1.0) - - let now = CFAbsoluteTimeGetCurrent() - guard now - ampState.lastEmissionTime >= 0.033 else { return } - ampState.lastEmissionTime = now - - VoiceInputManager.amplitudeSubject.send(scaled) - DispatchQueue.main.async { [weak self] in - self?.onAmplitudeChanged?(scaled) - } + let tapBlock: AVAudioNodeTapBlock = { [weak self] buffer, _ in + request.append(buffer) + + guard let channelData = buffer.floatChannelData else { return } + let frameLength = Int(buffer.frameLength) + guard frameLength > 0 else { return } + + let channelDataArray = Array(UnsafeBufferPointer(start: channelData[0], count: frameLength)) + let rawRMS = vDSP.rootMeanSquare(channelDataArray) + + let smoothed = 0.5 * rawRMS + 0.5 * ampState.previousSmoothed + ampState.previousSmoothed = smoothed + + // Scale amplitude to 0-1 range for waveform visualization. + // Speech RMS is typically 0.01-0.1; multiply to fill the visual range. + let scaled = min(smoothed * 14.0, 1.0) + + let now = CFAbsoluteTimeGetCurrent() + guard now - ampState.lastEmissionTime >= 0.033 else { return } + ampState.lastEmissionTime = now + + VoiceInputManager.amplitudeSubject.send(scaled) + DispatchQueue.main.async { [weak self] in + self?.onAmplitudeChanged?(scaled) } - ) else { - log.error("Audio engine failed to start — invalid format or engine error") - isRecording = false - onRecordingStateChanged?(false) - currentDictationContext = nil - recognitionRequest = nil - overlayWindow.dismiss() - resetAudioEngine() - return } - hasInstalledTap = true - recognitionTask = speechRecognizer.recognitionTask(with: request) { [weak self] result, error in - Task { @MainActor in - guard let self = self else { return } - // Ignore late callbacks delivered after recording was stopped - // (e.g. endAudio() triggering a delayed isFinal via Task dispatch). - guard self.isRecording else { return } - - if let result = result { - let text = result.bestTranscription.formattedString - if result.isFinal { - log.info("Transcription: \(text, privacy: .public)") - if !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { - self.handleFinalTranscription(text) + // Start the audio engine asynchronously to avoid blocking the main + // thread during Bluetooth negotiation or hardware initialization. + // The recognition task is started in the completion after the engine + // is running. This eliminates the 2+ second main-thread stall that + // occurs with queue.sync when coreaudiod is contended. + Task { [weak self] in + guard let self else { return } + let success = await self.engineController.installTapAndStartAsync( + bufferSize: 1024, + block: tapBlock + ) + // Verify this completion belongs to the current recording session. + // A quick release/retry can cause session A's completion to arrive + // while session B is active — using the stale request would + // desynchronize recognitionTask/recognitionRequest ownership. + guard self.isRecording, self.recordingGeneration == generation else { + // Only tear down if no session is currently active. When a newer + // session is running (isRecording true, generation mismatch), + // it owns the engine — tearing down here would remove its tap. + if success, !self.isRecording { + self.engineController.stopAndRemoveTap() + log.info("Engine started for stale generation \(generation) — tore down (no active session)") + } else if success { + log.info("Stale generation \(generation) completed — skipping teardown, session \(self.recordingGeneration) owns engine") + } + return + } + guard success else { + log.error("Audio engine failed to start — invalid format or engine error") + self.isRecording = false + self.onRecordingStateChanged?(false) + self.currentDictationContext = nil + self.recognitionRequest = nil + self.overlayWindow.dismiss() + self.resetAudioEngine() + return + } + self.hasInstalledTap = true + + self.recognitionTask = speechRecognizer.recognitionTask(with: request) { [weak self] result, error in + Task { @MainActor in + guard let self = self else { return } + // Ignore late callbacks delivered after recording was stopped + // (e.g. endAudio() triggering a delayed isFinal via Task dispatch). + guard self.isRecording else { return } + + if let result = result { + let text = result.bestTranscription.formattedString + if result.isFinal { + log.info("Transcription: \(text, privacy: .public)") + if !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + self.handleFinalTranscription(text) + } else { + VoiceFeedback.playDeactivationChime() + } + self.recognitionTask = nil + self.stopRecording() } else { - VoiceFeedback.playDeactivationChime() + self.onPartialTranscription?(text) + if self.currentMode == .dictation { + self.overlayWindow.updatePartialTranscription(text) + } } + } + + if let error = error { + log.error("Recognition error: \(error.localizedDescription)") self.recognitionTask = nil + VoiceFeedback.playDeactivationChime() self.stopRecording() - } else { - self.onPartialTranscription?(text) - if self.currentMode == .dictation { - self.overlayWindow.updatePartialTranscription(text) - } } } - - if let error = error { - log.error("Recognition error: \(error.localizedDescription)") - self.recognitionTask = nil - VoiceFeedback.playDeactivationChime() - self.stopRecording() - } } } - - VoiceFeedback.playActivationChime() } // MARK: - Permission Prompt @@ -765,10 +817,19 @@ final class VoiceInputManager { log.info("Permissions granted — starting recording") prewarmEngine() + self.beginRecording() + guard self.isRecording else { return } if self.currentMode == .dictation { - self.currentDictationContext = DictationContextCapture.capture() + let generation = self.recordingGeneration + Task.detached { [weak self] in + let context = DictationContextCapture.capture() + await MainActor.run { [weak self] in + guard let self else { return } + guard self.isRecording, self.recordingGeneration == generation else { return } + self.currentDictationContext = context + } + } } - self.beginRecording() } @@ -844,6 +905,23 @@ final class VoiceInputManager { } hasInstalledTap = false + // If the recognition task hasn't been started yet (async engine start + // still in progress), there's no callback to deliver isFinal. + // Clean up directly instead of waiting for a callback that won't come. + guard recognitionTask != nil else { + log.info("Recognition task not yet started — cleaning up directly") + recognitionRequest = nil + isRecording = false + currentDictationContext = nil + activeOrigin = .hotkey + amplitudeState.reset() + Self.amplitudeSubject.send(0) + onAmplitudeChanged?(0) + overlayWindow.dismiss() + VoiceFeedback.playDeactivationChime() + return + } + // Signal end of audio — the recognizer will process remaining audio // and fire the callback with isFinal = true. recognitionRequest?.endAudio() diff --git a/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift b/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift index cfe6ed34f3..d0ecd229b3 100644 --- a/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift +++ b/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift @@ -1,4 +1,5 @@ import AVFoundation +import AVFAudio import os private let log = Logger(subsystem: Bundle.appBundleIdentifier, category: "AudioEngineController") @@ -16,14 +17,53 @@ private let log = Logger(subsystem: Bundle.appBundleIdentifier, category: "Audio /// ensure `prewarm()` has run first so `inputNode` is already initialized and /// sync calls complete in sub-milliseconds. /// +/// Listens for `AVAudioEngineConfigurationChange` notifications to re-warm +/// `inputNode` after audio route changes (Bluetooth connect/disconnect, +/// AirPods mode switch, USB mic plug/unplug). +/// /// See: https://developer.apple.com/documentation/avfaudio/avaudionode/1387122-installtap final class AudioEngineController: @unchecked Sendable { private let audioEngine = AVAudioEngine() private let queue: DispatchQueue + private var configChangeObserver: (any NSObjectProtocol)? init(label: String = "com.vellum.audioEngine") { self.queue = DispatchQueue(label: label, qos: .userInitiated) + observeConfigurationChanges() + } + + deinit { + if let observer = configChangeObserver { + NotificationCenter.default.removeObserver(observer) + } + } + + // MARK: - Configuration Change Monitoring + + /// Re-prewarm `inputNode` when the audio hardware configuration changes + /// (Bluetooth device connect/disconnect, USB mic plug/unplug, AirPods + /// mode switch). Keeps the cached inputNode format fresh so subsequent + /// `installTapAndStart` calls complete in sub-milliseconds. + /// + /// See: https://developer.apple.com/documentation/avfaudio/avaudioengine/1386063-configurationchangenotification + private func observeConfigurationChanges() { + configChangeObserver = NotificationCenter.default.addObserver( + forName: .AVAudioEngineConfigurationChange, + object: audioEngine, + queue: nil + ) { [weak self] _ in + guard let self else { return } + guard AVCaptureDevice.authorizationStatus(for: .audio) == .authorized else { + log.info("Audio configuration changed — skipping re-warm (mic not authorized)") + return + } + log.info("Audio configuration changed — re-warming inputNode") + self.queue.async { + let _ = self.audioEngine.inputNode + log.info("Audio engine re-warmed after configuration change") + } + } } // MARK: - Pre-warm @@ -94,25 +134,52 @@ final class AudioEngineController: @unchecked Sendable { block: @escaping AVAudioNodeTapBlock ) -> Bool { queue.sync { [self] in - let inputNode = audioEngine.inputNode - let format = inputNode.outputFormat(forBus: 0) - guard format.channelCount > 0, format.sampleRate > 0 else { - log.error("Invalid audio format — channels: \(format.channelCount), sampleRate: \(format.sampleRate)") - return false + installTapAndStartImpl(bufferSize: bufferSize, block: block) + } + } + + /// Non-blocking variant of `installTapAndStart` using Swift concurrency. + /// Dispatches to the audio queue asynchronously and returns the result via + /// async/await, keeping the caller's thread free during engine initialization. + /// + /// Use this for latency-sensitive flows (e.g. PTT dictation) where showing + /// immediate UI feedback before the engine is ready improves perceived + /// responsiveness. + func installTapAndStartAsync( + bufferSize: AVAudioFrameCount, + block: @escaping AVAudioNodeTapBlock + ) async -> Bool { + await withCheckedContinuation { continuation in + queue.async { [self] in + let success = installTapAndStartImpl(bufferSize: bufferSize, block: block) + continuation.resume(returning: success) } + } + } + /// Shared implementation for both sync and async tap+start paths. + private func installTapAndStartImpl( + bufferSize: AVAudioFrameCount, + block: @escaping AVAudioNodeTapBlock + ) -> Bool { + let inputNode = audioEngine.inputNode + let format = inputNode.outputFormat(forBus: 0) + guard format.channelCount > 0, format.sampleRate > 0 else { + log.error("Invalid audio format — channels: \(format.channelCount), sampleRate: \(format.sampleRate)") + return false + } + + inputNode.removeTap(onBus: 0) + inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: nil, block: block) + + audioEngine.prepare() + do { + try audioEngine.start() + return true + } catch { + log.error("Failed to start audio engine: \(error.localizedDescription)") inputNode.removeTap(onBus: 0) - inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: nil, block: block) - - audioEngine.prepare() - do { - try audioEngine.start() - return true - } catch { - log.error("Failed to start audio engine: \(error.localizedDescription)") - inputNode.removeTap(onBus: 0) - return false - } + return false } } diff --git a/clients/macos/vellum-assistant/Features/Voice/OpenAIVoiceService.swift b/clients/macos/vellum-assistant/Features/Voice/OpenAIVoiceService.swift index 3bb94bf899..50aa1b807e 100644 --- a/clients/macos/vellum-assistant/Features/Voice/OpenAIVoiceService.swift +++ b/clients/macos/vellum-assistant/Features/Voice/OpenAIVoiceService.swift @@ -154,7 +154,8 @@ final class OpenAIVoiceService: VoiceServiceProtocol { // Reuse existing SFSpeechRecognizer across turns to avoid OS resource // release delays that make isAvailable return false on the second turn. - if speechRecognizer == nil { + // Recreate if transiently unavailable (e.g. after sleep/wake or heavy use). + if speechRecognizer == nil || speechRecognizer?.isAvailable != true { speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")) } guard let recognizer = speechRecognizer, recognizer.isAvailable else { From 323ddb41081b513d933392d5619bbef917d19538 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 16:20:46 -0400 Subject: [PATCH 4/5] [LUM-681] Fix audio tap format mismatch by resetting engine before installTap (#23766) After audio-route changes (Bluetooth, USB mic, AirPods mode switch), the format cached inside AVAudioInputNode diverges from the engine's actual hardware format. Both outputFormat(forBus:) and a nil format argument to installTap resolve to this stale value, causing: 'Failed to create tap due to format mismatch, ' Fix: call audioEngine.reset() before re-querying the format, then pass it explicitly to installTap. This forces the engine to discard its cached graph state and re-read the hardware, so the tap, node, and engine all agree. Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: tkheyfets --- .../Voice/AudioEngineController.swift | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift b/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift index d0ecd229b3..2cb90e63f1 100644 --- a/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift +++ b/clients/macos/vellum-assistant/Features/Voice/AudioEngineController.swift @@ -110,20 +110,23 @@ final class AudioEngineController: @unchecked Sendable { // MARK: - Combined Operations - /// Atomically validates audio input, installs a tap with `nil` format, and - /// starts the engine in a single synchronous dispatch to the audio queue. + /// Atomically resets the engine, validates audio input, installs a tap + /// with the freshly-queried hardware format, and starts the engine in a + /// single synchronous dispatch to the audio queue. /// - /// Passing `nil` for `installTap`'s format parameter lets AVAudioEngine use - /// its own internal hardware format, which is always self-consistent. This - /// prevents `NSInternalInconsistencyException` crashes caused by - /// `format.sampleRate != hwFormat.sampleRate` — the cached format from - /// `outputFormat(forBus:)` can diverge from the engine's internal hardware - /// format after audio route changes (Bluetooth, USB mic, AirPods mode - /// switch), even within a single synchronous block. + /// After audio-route changes (Bluetooth, USB mic, AirPods mode switch) + /// the format cached inside `AVAudioInputNode` can diverge from the + /// engine's actual hardware format. Both `outputFormat(forBus:)` **and** + /// a `nil` format argument to `installTap` resolve to this stale value, + /// causing: /// - /// The format validation (channels > 0, sampleRate > 0) is kept as a - /// pre-check to detect "no audio input available" — but the validated format - /// is **not** forwarded to `installTap`. + /// "Failed to create tap due to format mismatch, + /// " + /// + /// Calling `audioEngine.reset()` before re-querying forces the engine to + /// discard its cached graph state and re-read the hardware on the next + /// access. The fresh format is then passed **explicitly** to `installTap` + /// so the tap, the node, and the engine all agree. /// /// Returns `true` on success, or `false` if no audio input is available or /// the engine fails to start. @@ -158,19 +161,30 @@ final class AudioEngineController: @unchecked Sendable { } /// Shared implementation for both sync and async tap+start paths. + /// + /// Stops, removes any existing tap, and resets the engine before querying + /// `outputFormat(forBus:)` so the returned format reflects the current + /// hardware — not a stale cache from a previous audio route. private func installTapAndStartImpl( bufferSize: AVAudioFrameCount, block: @escaping AVAudioNodeTapBlock ) -> Bool { let inputNode = audioEngine.inputNode + + // Stop, remove any existing tap, and reset the engine so that + // outputFormat(forBus:) returns a value consistent with the + // current hardware — not a stale cache from a previous route. + audioEngine.stop() + inputNode.removeTap(onBus: 0) + audioEngine.reset() + let format = inputNode.outputFormat(forBus: 0) guard format.channelCount > 0, format.sampleRate > 0 else { log.error("Invalid audio format — channels: \(format.channelCount), sampleRate: \(format.sampleRate)") return false } - inputNode.removeTap(onBus: 0) - inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: nil, block: block) + inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: format, block: block) audioEngine.prepare() do { From 6a426cb0344bff9bc81a93cb94d54859720d224e Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 6 Apr 2026 17:09:32 -0400 Subject: [PATCH 5/5] fix: pass transport hints through HTTP message endpoint for managed-mode conversations (#23824) * fix: pass transport metadata through POST /v1/messages to enable host environment hints The HTTP message handler auto-creates conversations without transport metadata, so applyTransportMetadata() returns early and host environment hints (hostHomeDir, hostUsername) are never injected into the LLM context. This causes the assistant to hallucinate the user's home directory path from their display name instead of using the actual macOS username. Thread transport metadata from the message request body through SendMessageDeps.getOrCreateConversation() to the daemon, and send hostHomeDir/hostUsername from the macOS client in every message request. Co-Authored-By: Claude Opus 4.6 (1M context) * refactor: replace dynamic imports with static type imports Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- assistant/src/daemon/lifecycle.ts | 4 ++-- assistant/src/daemon/server.ts | 3 ++- assistant/src/runtime/http-types.ts | 6 ++++- .../src/runtime/routes/conversation-routes.ts | 23 ++++++++++++++++++ clients/shared/Network/MessageClient.swift | 24 +++++++++++++++++++ 5 files changed, 56 insertions(+), 4 deletions(-) diff --git a/assistant/src/daemon/lifecycle.ts b/assistant/src/daemon/lifecycle.ts index 5f8e8e60bf..a6c03e3634 100644 --- a/assistant/src/daemon/lifecycle.ts +++ b/assistant/src/daemon/lifecycle.ts @@ -870,8 +870,8 @@ export async function runDaemon(): Promise { guardianFollowUpConversationGenerator: createGuardianFollowUpConversationGenerator(), sendMessageDeps: { - getOrCreateConversation: (conversationId) => - server.getConversationForMessages(conversationId), + getOrCreateConversation: (conversationId, options) => + server.getConversationForMessages(conversationId, options), assistantEventHub, resolveAttachments: (attachmentIds) => { const resolved = attachmentsStore.getAttachmentsByIds(attachmentIds, { diff --git a/assistant/src/daemon/server.ts b/assistant/src/daemon/server.ts index 38a63f14e0..a2f4c92788 100644 --- a/assistant/src/daemon/server.ts +++ b/assistant/src/daemon/server.ts @@ -1448,8 +1448,9 @@ export class DaemonServer { */ async getConversationForMessages( conversationId: string, + options?: ConversationCreateOptions, ): Promise { - return this.getOrCreateConversation(conversationId); + return this.getOrCreateConversation(conversationId, options); } /** diff --git a/assistant/src/runtime/http-types.ts b/assistant/src/runtime/http-types.ts index 0085b71683..823650ed9c 100644 --- a/assistant/src/runtime/http-types.ts +++ b/assistant/src/runtime/http-types.ts @@ -5,6 +5,7 @@ import type { ChannelId, InterfaceId } from "../channels/types.js"; import type { CesClient } from "../credential-execution/client.js"; import type { Conversation } from "../daemon/conversation.js"; import type { TrustContext } from "../daemon/conversation-runtime-assembly.js"; +import type { ConversationCreateOptions } from "../daemon/handlers/shared.js"; import type { SkillOperationContext } from "../daemon/handlers/skills.js"; import type { ServerMessage } from "../daemon/message-protocol.js"; import type { @@ -150,7 +151,10 @@ export type MessageProcessor = ( * Hub publishing wires outbound events to the SSE stream. */ export interface SendMessageDeps { - getOrCreateConversation: (conversationId: string) => Promise; + getOrCreateConversation: ( + conversationId: string, + options?: ConversationCreateOptions, + ) => Promise; assistantEventHub: AssistantEventHub; resolveAttachments: (attachmentIds: string[]) => Array<{ id: string; diff --git a/assistant/src/runtime/routes/conversation-routes.ts b/assistant/src/runtime/routes/conversation-routes.ts index 5e9e81a9c7..1bfb8b7ba5 100644 --- a/assistant/src/runtime/routes/conversation-routes.ts +++ b/assistant/src/runtime/routes/conversation-routes.ts @@ -38,6 +38,10 @@ import { HostBashProxy } from "../../daemon/host-bash-proxy.js"; import { HostCuProxy } from "../../daemon/host-cu-proxy.js"; import { HostFileProxy } from "../../daemon/host-file-proxy.js"; import type { ServerMessage } from "../../daemon/message-protocol.js"; +import type { + MacosTransportMetadata, + NonMacosTransportMetadata, +} from "../../daemon/message-types/conversations.js"; import type { HeartbeatService } from "../../heartbeat/heartbeat-service.js"; import * as attachmentsStore from "../../memory/attachments-store.js"; import { @@ -942,6 +946,8 @@ export async function handleSendMessage( conversationType?: string; automated?: boolean; bypassSecretCheck?: boolean; + hostHomeDir?: string; + hostUsername?: string; }; const { conversationKey, content, attachmentIds } = body; @@ -1045,8 +1051,25 @@ export async function handleSendMessage( conversationType, }); const smDeps = deps.sendMessageDeps; + + // Build transport metadata from the request so the daemon can inject + // host environment hints (home directory, username) into the LLM context. + const transport = + sourceInterface === "macos" + ? ({ + channelId: sourceChannel, + interfaceId: "macos" as const, + hostHomeDir: body.hostHomeDir, + hostUsername: body.hostUsername, + } satisfies MacosTransportMetadata) + : ({ + channelId: sourceChannel, + interfaceId: sourceInterface, + } satisfies NonMacosTransportMetadata); + const conversation = await smDeps.getOrCreateConversation( mapping.conversationId, + { transport }, ); // Resolve guardian context from the AuthContext's actorPrincipalId. diff --git a/clients/shared/Network/MessageClient.swift b/clients/shared/Network/MessageClient.swift index 6dd9b48f33..bb290a8894 100644 --- a/clients/shared/Network/MessageClient.swift +++ b/clients/shared/Network/MessageClient.swift @@ -44,6 +44,24 @@ public struct MessageClient: MessageClientProtocol { #endif } + /// The host home directory, populated automatically on macOS. + private static var hostHomeDir: String? { + #if os(macOS) + return NSHomeDirectory() + #else + return nil + #endif + } + + /// The host username, populated automatically on macOS. + private static var hostUsername: String? { + #if os(macOS) + return NSUserName() + #else + return nil + #endif + } + public func uploadAttachment(filename: String, mimeType: String, data: String, filePath: String? = nil) async -> AttachmentUploadResult { log.info("[send-pipeline] attachment upload start — filename=\(filename, privacy: .public), mimeType=\(mimeType, privacy: .public)") @@ -106,6 +124,12 @@ public struct MessageClient: MessageClientProtocol { if bypassSecretCheck == true { body["bypassSecretCheck"] = true } + if let hostHomeDir = Self.hostHomeDir { + body["hostHomeDir"] = hostHomeDir + } + if let hostUsername = Self.hostUsername { + body["hostUsername"] = hostUsername + } do { let response = try await GatewayHTTPClient.post(