-
Notifications
You must be signed in to change notification settings - Fork 88
feat: add false positive handling and privacy safeguards for wake word (#7999) #8141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
109 changes: 109 additions & 0 deletions
109
clients/macos/vellum-assistant/Features/Voice/WakeWord/WakeWordDismissHandler.swift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| import Foundation | ||
| import Combine | ||
| import os | ||
|
|
||
| private let log = Logger(subsystem: "com.vellum.vellum-assistant", category: "WakeWordDismissHandler") | ||
|
|
||
| /// Handles dismissal of wake word activations: Escape key cancellation, | ||
| /// auto-cancel on silence, and cooldown to prevent rapid re-triggers. | ||
| @MainActor | ||
| final class WakeWordDismissHandler: ObservableObject { | ||
| /// Duration to wait for speech after wake word detection before auto-cancelling. | ||
| static let silenceTimeout: TimeInterval = 3.0 | ||
| /// Cooldown after a dismissal before accepting another wake word detection. | ||
| static let cooldownDuration: TimeInterval = 2.0 | ||
|
|
||
| @Published private(set) var isInCooldown = false | ||
|
|
||
| /// Total number of dismissals since app launch, useful for debugging. | ||
| private(set) var dismissCount = 0 | ||
|
|
||
| /// Called when the handler dismisses an activation (Escape, silence, or programmatic). | ||
| var onDismiss: (() -> Void)? | ||
|
|
||
| private var silenceTimer: Task<Void, Never>? | ||
| private var cooldownTimer: Task<Void, Never>? | ||
| /// Whether an activation is currently in progress and can be dismissed. | ||
| private var isActivationInProgress = false | ||
|
|
||
| // MARK: - Activation Lifecycle | ||
|
|
||
| /// Call when a wake word is detected and the system enters "activated" state. | ||
| /// Starts the silence timeout to auto-cancel if no speech is detected. | ||
| func activationStarted() { | ||
| guard !isInCooldown else { | ||
| log.debug("Ignoring activation during cooldown period") | ||
| return | ||
| } | ||
| isActivationInProgress = true | ||
| startSilenceTimer() | ||
| log.debug("Activation started, silence timer running") | ||
| } | ||
|
|
||
| /// Call when speech is detected after activation. Cancels the silence timeout | ||
| /// since the user is actively speaking. | ||
| func speechDetected() { | ||
| cancelSilenceTimer() | ||
| log.debug("Speech detected, silence timer cancelled") | ||
| } | ||
|
|
||
| /// Call when the activation completes normally (speech was processed). | ||
| /// No cooldown is applied for normal completions. | ||
| func activationCompleted() { | ||
| cancelSilenceTimer() | ||
| isActivationInProgress = false | ||
| log.debug("Activation completed normally") | ||
| } | ||
|
|
||
| /// Dismiss the current activation via Escape key or programmatic request. | ||
| func dismiss() { | ||
| guard isActivationInProgress else { return } | ||
| performDismiss(reason: "user dismiss (Escape)") | ||
| } | ||
|
|
||
| // MARK: - Silence Handling | ||
|
|
||
| private func startSilenceTimer() { | ||
| cancelSilenceTimer() | ||
| silenceTimer = Task { [weak self] in | ||
| try? await Task.sleep(nanoseconds: UInt64(Self.silenceTimeout * 1_000_000_000)) | ||
| guard !Task.isCancelled else { return } | ||
| self?.handleSilenceTimeout() | ||
| } | ||
| } | ||
|
|
||
| private func cancelSilenceTimer() { | ||
| silenceTimer?.cancel() | ||
| silenceTimer = nil | ||
| } | ||
|
|
||
| private func handleSilenceTimeout() { | ||
| guard isActivationInProgress else { return } | ||
| performDismiss(reason: "silence timeout (\(Self.silenceTimeout)s)") | ||
| } | ||
|
|
||
| // MARK: - Dismiss + Cooldown | ||
|
|
||
| private func performDismiss(reason: String) { | ||
| cancelSilenceTimer() | ||
| isActivationInProgress = false | ||
| dismissCount += 1 | ||
| log.info("Dismissed activation: \(reason) (total dismissals: \(self.dismissCount))") | ||
|
|
||
| onDismiss?() | ||
| startCooldown() | ||
| } | ||
|
|
||
| private func startCooldown() { | ||
| cooldownTimer?.cancel() | ||
| isInCooldown = true | ||
| log.debug("Cooldown started (\(Self.cooldownDuration)s)") | ||
|
|
||
| cooldownTimer = Task { [weak self] in | ||
| try? await Task.sleep(nanoseconds: UInt64(Self.cooldownDuration * 1_000_000_000)) | ||
| guard !Task.isCancelled else { return } | ||
| self?.isInCooldown = false | ||
| log.debug("Cooldown ended") | ||
| } | ||
| } | ||
| } | ||
115 changes: 115 additions & 0 deletions
115
clients/macos/vellum-assistant/Features/Voice/WakeWord/WakeWordErrorRecovery.swift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| import Foundation | ||
| import Combine | ||
| import os | ||
|
|
||
| private let log = Logger(subsystem: "com.vellum.vellum-assistant", category: "WakeWordErrorRecovery") | ||
|
|
||
| /// Handles automatic recovery from wake word engine errors: auto-restart on | ||
| /// engine failures, pause/resume on mic unavailability, and retry limits. | ||
| @MainActor | ||
| final class WakeWordErrorRecovery: ObservableObject { | ||
| /// Delay before attempting to restart the engine after an error. | ||
| static let restartDelay: TimeInterval = 5.0 | ||
| /// Maximum consecutive retries before giving up. | ||
| static let maxRetries = 5 | ||
|
|
||
| @Published private(set) var hasGivenUp = false | ||
| @Published private(set) var consecutiveErrors = 0 | ||
|
|
||
| /// Called when the engine has exceeded max retries and will not attempt further restarts. | ||
| var onGaveUp: (() -> Void)? | ||
|
|
||
| /// Error history for debugging, stores timestamps and descriptions. | ||
| private(set) var errorHistory: [(date: Date, description: String)] = [] | ||
|
|
||
| private let engine: WakeWordEngine | ||
| private var restartTask: Task<Void, Never>? | ||
|
|
||
| init(engine: WakeWordEngine) { | ||
| self.engine = engine | ||
| } | ||
|
|
||
| // MARK: - Error Handling | ||
|
|
||
| /// Call when the wake word engine encounters an error. | ||
| func handleEngineError(_ error: Error) { | ||
| let description = error.localizedDescription | ||
| errorHistory.append((date: Date(), description: description)) | ||
| consecutiveErrors += 1 | ||
|
|
||
| log.error("Wake word engine error (\(self.consecutiveErrors)/\(Self.maxRetries)): \(description)") | ||
|
|
||
| if consecutiveErrors >= Self.maxRetries { | ||
| giveUp() | ||
| } else { | ||
| scheduleRestart() | ||
| } | ||
| } | ||
|
|
||
| /// Call when the microphone becomes unavailable (e.g., disconnected or claimed by another app). | ||
| func handleMicUnavailable() { | ||
| log.warning("Microphone unavailable, stopping engine") | ||
| engine.stop() | ||
| errorHistory.append((date: Date(), description: "Microphone unavailable")) | ||
|
alex-nork marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /// Call when the microphone becomes available again. | ||
| func handleMicAvailable() { | ||
| guard !hasGivenUp else { | ||
| log.info("Mic available but engine has given up, not restarting") | ||
| return | ||
| } | ||
|
|
||
| log.info("Microphone available, restarting engine") | ||
| // Reset consecutive errors on mic restore since this is an external recovery | ||
| consecutiveErrors = 0 | ||
| attemptRestart() | ||
| } | ||
|
|
||
| /// Call when the engine starts successfully to reset the error counter. | ||
| func handleEngineStarted() { | ||
| consecutiveErrors = 0 | ||
| } | ||
|
|
||
| /// Reset the error state so the engine can be retried (e.g., after user intervention). | ||
| func reset() { | ||
| restartTask?.cancel() | ||
| restartTask = nil | ||
| consecutiveErrors = 0 | ||
| hasGivenUp = false | ||
| log.info("Error recovery state reset") | ||
| } | ||
|
|
||
| // MARK: - Internal | ||
|
|
||
| private func scheduleRestart() { | ||
| restartTask?.cancel() | ||
| log.info("Scheduling engine restart in \(Self.restartDelay)s") | ||
|
|
||
| restartTask = Task { [weak self] in | ||
| try? await Task.sleep(nanoseconds: UInt64(Self.restartDelay * 1_000_000_000)) | ||
| guard !Task.isCancelled else { return } | ||
| self?.attemptRestart() | ||
| } | ||
| } | ||
|
|
||
| private func attemptRestart() { | ||
| do { | ||
| try engine.start() | ||
| log.info("Engine restarted successfully") | ||
| consecutiveErrors = 0 | ||
| } catch { | ||
| log.error("Engine restart failed: \(error.localizedDescription)") | ||
| handleEngineError(error) | ||
| } | ||
| } | ||
|
|
||
| private func giveUp() { | ||
| restartTask?.cancel() | ||
| restartTask = nil | ||
| hasGivenUp = true | ||
| engine.stop() | ||
| log.error("Gave up restarting engine after \(Self.maxRetries) consecutive failures") | ||
| onGaveUp?() | ||
| } | ||
| } | ||
113 changes: 113 additions & 0 deletions
113
clients/macos/vellum-assistant/Features/Voice/WakeWord/WakeWordPrivacyGuard.swift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| import Foundation | ||
| import Combine | ||
| import AppKit | ||
| import os | ||
|
|
||
| private let log = Logger(subsystem: "com.vellum.vellum-assistant", category: "WakeWordPrivacyGuard") | ||
|
|
||
| /// Monitors system state and automatically pauses/resumes wake word listening | ||
| /// for privacy. Pauses on screen lock, system sleep, and app termination; | ||
| /// resumes on unlock and wake. | ||
| @MainActor | ||
| final class WakeWordPrivacyGuard { | ||
| private let audioMonitor: AlwaysOnAudioMonitor | ||
| private var cancellables = Set<AnyCancellable>() | ||
| /// Whether listening was active before a privacy pause, so we only resume if it was. | ||
| private var wasListeningBeforePause = false | ||
|
|
||
| init(audioMonitor: AlwaysOnAudioMonitor) { | ||
| self.audioMonitor = audioMonitor | ||
| observeSystemEvents() | ||
| } | ||
|
|
||
| deinit { | ||
| // NotificationCenter observers are cleaned up via cancellables | ||
| } | ||
|
|
||
| // MARK: - System Event Observation | ||
|
|
||
| private func observeSystemEvents() { | ||
| let workspace = NSWorkspace.shared.notificationCenter | ||
|
|
||
| // Screen sleep (display off / screensaver) | ||
| workspace.publisher(for: NSWorkspace.screensDidSleepNotification) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handlePrivacyPause(reason: "screen sleep") | ||
| } | ||
| .store(in: &cancellables) | ||
|
|
||
| // System sleep | ||
| workspace.publisher(for: NSWorkspace.willSleepNotification) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handlePrivacyPause(reason: "system sleep") | ||
| } | ||
| .store(in: &cancellables) | ||
|
|
||
| // Screen wake | ||
| workspace.publisher(for: NSWorkspace.screensDidWakeNotification) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handlePrivacyResume(reason: "screen wake") | ||
| } | ||
| .store(in: &cancellables) | ||
|
|
||
| // System wake | ||
| workspace.publisher(for: NSWorkspace.didWakeNotification) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handlePrivacyResume(reason: "system wake") | ||
| } | ||
| .store(in: &cancellables) | ||
|
|
||
| // Screen lock via DistributedNotificationCenter | ||
| DistributedNotificationCenter.default().publisher( | ||
| for: Notification.Name("com.apple.screenIsLocked") | ||
| ) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handlePrivacyPause(reason: "screen lock") | ||
| } | ||
| .store(in: &cancellables) | ||
|
|
||
| // Screen unlock via DistributedNotificationCenter | ||
| DistributedNotificationCenter.default().publisher( | ||
| for: Notification.Name("com.apple.screenIsUnlocked") | ||
| ) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handlePrivacyResume(reason: "screen unlock") | ||
| } | ||
| .store(in: &cancellables) | ||
|
|
||
| // App termination | ||
| NotificationCenter.default.publisher(for: NSApplication.willTerminateNotification) | ||
| .receive(on: DispatchQueue.main) | ||
| .sink { [weak self] _ in | ||
| self?.handleTermination() | ||
| } | ||
| .store(in: &cancellables) | ||
| } | ||
|
|
||
| // MARK: - Privacy Actions | ||
|
|
||
| private func handlePrivacyPause(reason: String) { | ||
| guard audioMonitor.isListening else { return } | ||
| wasListeningBeforePause = true | ||
| audioMonitor.stopMonitoring() | ||
| log.info("Paused wake word listening: \(reason)") | ||
| } | ||
|
|
||
| private func handlePrivacyResume(reason: String) { | ||
| guard wasListeningBeforePause else { return } | ||
| wasListeningBeforePause = false | ||
| audioMonitor.startMonitoring() | ||
| log.info("Resumed wake word listening: \(reason)") | ||
| } | ||
|
|
||
| private func handleTermination() { | ||
| audioMonitor.stopMonitoring() | ||
| log.info("Stopped wake word listening: app terminating") | ||
| } | ||
| } |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.