From 84cad6a99fea084e8ddd20ee618e09b539de05ad Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Wed, 15 Apr 2026 13:54:07 -0700
Subject: [PATCH 01/30] feat: wire up voice dictation in goose2 via ACP

Add voice dictation support to the goose2 Tauri app by exposing
transcription and config as ACP custom methods, then wiring the
frontend to use them.

Backend (crates/):
- Add DictationTranscribeRequest/Response and DictationConfigRequest/Response
  types to goose-sdk custom_requests.rs with model metadata fields
- Add #[custom_method] handlers in goose-acp server.rs for transcribe
  (OpenAI, Groq, ElevenLabs, Local) and config
- Register methods in acp-meta.json
- Forward local-inference feature from goose-cli to goose-acp

Tauri (ui/goose2/src-tauri/):
- Rewrite dictation.rs to use call_ext_method via ACP instead of
  importing goose crate directly
- Add generic CallExt command to ACP manager with method name
  normalization (strips leading _ to avoid double-prefix)
- Register get_dictation_config and transcribe_dictation commands

Frontend (ui/goose2/src/):
- Wire useDictationRecorder + useVoiceInputPreferences into ChatInput
- Replace placeholder mic button with working toggle (recording/
  transcribing states, auto-submit on keyword)
- Stop recording on manual send and on auto-submit keyword
- Show "Listening..."/"Transcribing..." placeholder in textarea
- Add Voice section to SettingsModal with VoiceInputSettings
- Add all voice i18n strings (en + es)
- Fix pre-existing type errors in dictationVad.ts and VoiceInputSettings

Known issue: Local Whisper reports configured: false despite model being
downloaded and config set. The is_downloaded() path check needs
investigation in a follow-up.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 Cargo.lock                                    |   1 +
 crates/goose-acp/Cargo.toml                   |   2 +
 crates/goose-acp/acp-meta.json                |  10 +
 crates/goose-acp/src/server.rs                | 201 ++++++++
 crates/goose-cli/Cargo.toml                   |   2 +-
 crates/goose-sdk/src/custom_requests.rs       |  60 +++
 ui/goose2/src-tauri/Info.plist                |   8 +
 .../src-tauri/src/services/provider_defs.rs   |  11 +
 .../chat/hooks/useDictationRecorder.ts        | 317 ++++++++++++
 .../features/chat/hooks/useVoiceDictation.ts  | 117 +++++
 .../chat/hooks/useVoiceInputPreferences.ts    | 161 ++++++
 .../features/chat/lib/dictationVad.test.ts    |  51 ++
 .../src/features/chat/lib/dictationVad.ts     | 147 ++++++
 .../src/features/chat/lib/voiceInput.test.ts  |  85 ++++
 ui/goose2/src/features/chat/lib/voiceInput.ts | 177 +++++++
 ui/goose2/src/features/chat/ui/ChatInput.tsx  |  38 +-
 .../src/features/chat/ui/ChatInputToolbar.tsx |  33 +-
 .../features/settings/ui/SettingsModal.tsx    |   4 +
 .../settings/ui/VoiceInputSettings.tsx        | 465 ++++++++++++++++++
 ui/goose2/src/shared/api/dictation.ts         | 100 ++++
 .../src/shared/i18n/locales/en/chat.json      |   6 +-
 .../src/shared/i18n/locales/en/settings.json  |  46 +-
 .../src/shared/i18n/locales/es/chat.json      |   6 +-
 .../src/shared/i18n/locales/es/settings.json  |  46 +-
 ui/goose2/src/shared/types/dictation.ts       |  51 ++
 25 files changed, 2134 insertions(+), 11 deletions(-)
 create mode 100644 ui/goose2/src-tauri/Info.plist
 create mode 100644 ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
 create mode 100644 ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
 create mode 100644 ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
 create mode 100644 ui/goose2/src/features/chat/lib/dictationVad.test.ts
 create mode 100644 ui/goose2/src/features/chat/lib/dictationVad.ts
 create mode 100644 ui/goose2/src/features/chat/lib/voiceInput.test.ts
 create mode 100644 ui/goose2/src/features/chat/lib/voiceInput.ts
 create mode 100644 ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
 create mode 100644 ui/goose2/src/shared/api/dictation.ts
 create mode 100644 ui/goose2/src/shared/types/dictation.ts

diff --git a/Cargo.lock b/Cargo.lock
index 093e9658825e..e38f0b0f0aa3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4454,6 +4454,7 @@ dependencies = [
  "async-stream",
  "async-trait",
  "axum",
+ "base64 0.22.1",
  "fs-err",
  "futures",
  "goose",
diff --git a/crates/goose-acp/Cargo.toml b/crates/goose-acp/Cargo.toml
index 8bc2b1e7eed5..a7200146b8a8 100644
--- a/crates/goose-acp/Cargo.toml
+++ b/crates/goose-acp/Cargo.toml
@@ -14,6 +14,7 @@ path = "src/bin/generate_acp_schema.rs"
 [features]
 default = ["code-mode", "rustls-tls"]
 code-mode = ["goose/code-mode"]
+local-inference = ["goose/local-inference"]
 rustls-tls = ["goose/rustls-tls", "goose-mcp/rustls-tls"]
 native-tls = ["goose/native-tls", "goose-mcp/native-tls"]
 
@@ -48,6 +49,7 @@ uuid = { workspace = true, features = ["v7"] }
 schemars = { workspace = true, features = ["derive"] }
 goose-acp-macros = { path = "../goose-acp-macros" }
 goose-sdk = { path = "../goose-sdk" }
+base64 = { workspace = true }
 
 [dev-dependencies]
 async-trait = { workspace = true }
diff --git a/crates/goose-acp/acp-meta.json b/crates/goose-acp/acp-meta.json
index 944d227b663f..3cd63e5f726f 100644
--- a/crates/goose-acp/acp-meta.json
+++ b/crates/goose-acp/acp-meta.json
@@ -104,6 +104,16 @@
       "method": "_goose/session/unarchive",
       "requestType": "UnarchiveSessionRequest",
       "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/transcribe",
+      "requestType": "DictationTranscribeRequest",
+      "responseType": "DictationTranscribeResponse"
+    },
+    {
+      "method": "_goose/dictation/config",
+      "requestType": "DictationConfigRequest",
+      "responseType": "DictationConfigResponse"
     }
   ]
 }
diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index d1a8212c7507..6176b02f8f85 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -16,6 +16,13 @@ use goose::config::paths::Paths;
 use goose::config::permission::PermissionManager;
 use goose::config::{Config, GooseMode};
 use goose::conversation::message::{ActionRequiredData, Message, MessageContent};
+#[cfg(feature = "local-inference")]
+use goose::dictation::providers::transcribe_local;
+use goose::dictation::providers::{
+    all_providers, is_configured, transcribe_with_provider, DictationProvider,
+};
+#[cfg(feature = "local-inference")]
+use goose::dictation::whisper;
 use goose::mcp_utils::ToolResult;
 use goose::permission::permission_confirmation::PrincipalType;
 use goose::permission::{Permission, PermissionConfirmation};
@@ -68,6 +75,9 @@ pub type AcpProviderFactory = Arc<
 
 const DEFAULT_PROVIDER_ID: &str = "goose";
 const DEFAULT_PROVIDER_LABEL: &str = "Goose (Default)";
+const OPENAI_TRANSCRIPTION_MODEL: &str = "whisper-1";
+const GROQ_TRANSCRIPTION_MODEL: &str = "whisper-large-v3-turbo";
+const ELEVENLABS_TRANSCRIPTION_MODEL: &str = "scribe_v1";
 
 /// In-memory state for an active ACP session.
 ///
@@ -2904,6 +2914,197 @@ impl GooseAcpAgent {
             .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
         Ok(EmptyResponse {})
     }
+
+    #[custom_method(DictationTranscribeRequest)]
+    async fn on_dictation_transcribe(
+        &self,
+        req: DictationTranscribeRequest,
+    ) -> Result<DictationTranscribeResponse, sacp::Error> {
+        use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
+
+        let provider: DictationProvider = serde_json::from_value(serde_json::Value::String(
+            req.provider.clone(),
+        ))
+        .map_err(|_| {
+            sacp::Error::invalid_params().data(format!("Unknown provider: {}", req.provider))
+        })?;
+
+        let audio_bytes = BASE64
+            .decode(&req.audio)
+            .map_err(|_| sacp::Error::invalid_params().data("Invalid base64 audio data"))?;
+
+        if audio_bytes.len() > 50 * 1024 * 1024 {
+            return Err(sacp::Error::invalid_params().data("Audio too large (max 50MB)"));
+        }
+
+        let extension = match req.mime_type.as_str() {
+            "audio/webm" | "audio/webm;codecs=opus" => "webm",
+            "audio/mp4" => "mp4",
+            "audio/mpeg" | "audio/mpga" => "mp3",
+            "audio/m4a" => "m4a",
+            "audio/wav" | "audio/x-wav" => "wav",
+            other => {
+                return Err(
+                    sacp::Error::invalid_params().data(format!("Unsupported format: {other}"))
+                )
+            }
+        };
+
+        let text = match provider {
+            DictationProvider::OpenAI => {
+                transcribe_with_provider(
+                    DictationProvider::OpenAI,
+                    "model".to_string(),
+                    "whisper-1".to_string(),
+                    audio_bytes,
+                    extension,
+                    &req.mime_type,
+                )
+                .await
+            }
+            DictationProvider::Groq => {
+                transcribe_with_provider(
+                    DictationProvider::Groq,
+                    "model".to_string(),
+                    "whisper-large-v3-turbo".to_string(),
+                    audio_bytes,
+                    extension,
+                    &req.mime_type,
+                )
+                .await
+            }
+            DictationProvider::ElevenLabs => {
+                transcribe_with_provider(
+                    DictationProvider::ElevenLabs,
+                    "model_id".to_string(),
+                    "scribe_v1".to_string(),
+                    audio_bytes,
+                    extension,
+                    &req.mime_type,
+                )
+                .await
+            }
+            #[cfg(feature = "local-inference")]
+            DictationProvider::Local => transcribe_local(audio_bytes).await,
+            #[cfg(not(feature = "local-inference"))]
+            DictationProvider::Local => {
+                return Err(sacp::Error::invalid_params()
+                    .data("Local inference is not available in this build"));
+            }
+        }
+        .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+        Ok(DictationTranscribeResponse { text })
+    }
+
+    #[custom_method(DictationConfigRequest)]
+    async fn on_dictation_config(
+        &self,
+        _req: DictationConfigRequest,
+    ) -> Result<DictationConfigResponse, sacp::Error> {
+        let config = goose::config::Config::global();
+        let mut providers = std::collections::HashMap::new();
+
+        for def in all_providers() {
+            let provider = def.provider;
+            let host = if let Some(host_key) = def.host_key {
+                config
+                    .get(host_key, false)
+                    .ok()
+                    .and_then(|v| v.as_str().map(|s| s.to_string()))
+            } else {
+                None
+            };
+
+            let provider_key = serde_json::to_value(provider)
+                .ok()
+                .and_then(|v| v.as_str().map(|s| s.to_string()))
+                .unwrap_or_else(|| format!("{:?}", provider).to_lowercase());
+            providers.insert(
+                provider_key,
+                DictationProviderStatusEntry {
+                    configured: is_configured(provider),
+                    host,
+                    description: def.description.to_string(),
+                    uses_provider_config: def.uses_provider_config,
+                    settings_path: def.settings_path.map(|s| s.to_string()),
+                    config_key: if !def.uses_provider_config {
+                        Some(def.config_key.to_string())
+                    } else {
+                        None
+                    },
+                    model_config_key: dictation_model_config_key(provider),
+                    default_model: dictation_default_model(provider),
+                    selected_model: dictation_selected_model(&config, provider),
+                    available_models: dictation_available_models(provider),
+                },
+            );
+        }
+
+        Ok(DictationConfigResponse { providers })
+    }
+}
+
+fn dictation_model_config_key(provider: DictationProvider) -> Option<String> {
+    #[cfg(feature = "local-inference")]
+    if provider == DictationProvider::Local {
+        return Some(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY.to_string());
+    }
+
+    None
+}
+
+fn dictation_default_model(provider: DictationProvider) -> Option<String> {
+    match provider {
+        DictationProvider::OpenAI => Some(OPENAI_TRANSCRIPTION_MODEL.to_string()),
+        DictationProvider::Groq => Some(GROQ_TRANSCRIPTION_MODEL.to_string()),
+        DictationProvider::ElevenLabs => Some(ELEVENLABS_TRANSCRIPTION_MODEL.to_string()),
+        #[cfg(feature = "local-inference")]
+        DictationProvider::Local => Some(whisper::recommend_model().to_string()),
+    }
+}
+
+fn dictation_selected_model(config: &Config, provider: DictationProvider) -> Option<String> {
+    #[cfg(feature = "local-inference")]
+    if provider == DictationProvider::Local {
+        return config
+            .get(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY, false)
+            .ok()
+            .and_then(|value| value.as_str().map(str::to_owned))
+            .filter(|model_id| whisper::get_model(model_id).is_some())
+            .or_else(|| dictation_default_model(provider));
+    }
+
+    dictation_default_model(provider)
+}
+
+fn dictation_available_models(provider: DictationProvider) -> Vec<DictationModelOption> {
+    match provider {
+        DictationProvider::OpenAI => vec![DictationModelOption {
+            id: OPENAI_TRANSCRIPTION_MODEL.to_string(),
+            label: "Whisper-1".to_string(),
+            description: "OpenAI's hosted Whisper transcription model.".to_string(),
+        }],
+        DictationProvider::Groq => vec![DictationModelOption {
+            id: GROQ_TRANSCRIPTION_MODEL.to_string(),
+            label: "Whisper Large V3 Turbo".to_string(),
+            description: "Groq's fast hosted Whisper transcription model.".to_string(),
+        }],
+        DictationProvider::ElevenLabs => vec![DictationModelOption {
+            id: ELEVENLABS_TRANSCRIPTION_MODEL.to_string(),
+            label: "Scribe v1".to_string(),
+            description: "ElevenLabs' hosted speech-to-text model.".to_string(),
+        }],
+        #[cfg(feature = "local-inference")]
+        DictationProvider::Local => whisper::available_models()
+            .iter()
+            .map(|model| DictationModelOption {
+                id: model.id.to_string(),
+                label: model.id.to_string(),
+                description: model.description.to_string(),
+            })
+            .collect(),
+    }
 }
 
 pub struct GooseAcpHandler {
diff --git a/crates/goose-cli/Cargo.toml b/crates/goose-cli/Cargo.toml
index 6c20a644912a..369cd59606cb 100644
--- a/crates/goose-cli/Cargo.toml
+++ b/crates/goose-cli/Cargo.toml
@@ -71,7 +71,7 @@ winapi = { workspace = true }
 [features]
 default = ["code-mode", "local-inference", "aws-providers", "telemetry", "otel", "rustls-tls"]
 code-mode = ["goose/code-mode", "goose-acp/code-mode"]
-local-inference = ["goose/local-inference"]
+local-inference = ["goose/local-inference", "goose-acp/local-inference"]
 aws-providers = ["goose/aws-providers"]
 cuda = ["goose/cuda", "local-inference"]
 telemetry = ["goose/telemetry"]
diff --git a/crates/goose-sdk/src/custom_requests.rs b/crates/goose-sdk/src/custom_requests.rs
index bbc375be09f3..46359100a3bf 100644
--- a/crates/goose-sdk/src/custom_requests.rs
+++ b/crates/goose-sdk/src/custom_requests.rs
@@ -309,6 +309,66 @@ pub struct ProviderConfigKey {
     pub primary: bool,
 }
 
+/// Transcribe audio via a dictation provider.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/transcribe", response = DictationTranscribeResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationTranscribeRequest {
+    /// Base64-encoded audio data
+    pub audio: String,
+    /// MIME type (e.g. "audio/wav", "audio/webm")
+    pub mime_type: String,
+    /// Provider to use: "openai", "groq", "elevenlabs", or "local"
+    pub provider: String,
+}
+
+/// Transcription result.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+pub struct DictationTranscribeResponse {
+    pub text: String,
+}
+
+/// Get the configuration status of all dictation providers.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/config", response = DictationConfigResponse)]
+pub struct DictationConfigRequest {}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct DictationModelOption {
+    pub id: String,
+    pub label: String,
+    pub description: String,
+}
+
+/// Per-provider configuration status.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationProviderStatusEntry {
+    pub configured: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub host: Option<String>,
+    pub description: String,
+    pub uses_provider_config: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub settings_path: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub config_key: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_config_key: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub default_model: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub selected_model: Option<String>,
+    #[serde(default)]
+    pub available_models: Vec<DictationModelOption>,
+}
+
+/// Dictation config response — map of provider name to status.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+pub struct DictationConfigResponse {
+    pub providers: HashMap<String, DictationProviderStatusEntry>,
+}
+
 /// Empty success response for operations that return no data.
 #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
 pub struct EmptyResponse {}
diff --git a/ui/goose2/src-tauri/Info.plist b/ui/goose2/src-tauri/Info.plist
new file mode 100644
index 000000000000..8588d2d741c4
--- /dev/null
+++ b/ui/goose2/src-tauri/Info.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>NSMicrophoneUsageDescription</key>
+  <string>Goose uses your microphone to capture voice input for dictation.</string>
+</dict>
+</plist>
diff --git a/ui/goose2/src-tauri/src/services/provider_defs.rs b/ui/goose2/src-tauri/src/services/provider_defs.rs
index 0a2a326eaf00..5eea0c0a5a64 100644
--- a/ui/goose2/src-tauri/src/services/provider_defs.rs
+++ b/ui/goose2/src-tauri/src/services/provider_defs.rs
@@ -125,6 +125,17 @@ pub(crate) static PROVIDER_CONFIG_DEFS: &[ProviderConfigDef] = &[
         keys: &[],
         oauth_cache_path: None,
     },
+    // Dictation providers (voice input)
+    ProviderConfigDef {
+        id: "dictation_groq",
+        keys: &[key("GROQ_API_KEY", true, true)],
+        oauth_cache_path: None,
+    },
+    ProviderConfigDef {
+        id: "dictation_elevenlabs",
+        keys: &[key("ELEVENLABS_API_KEY", true, true)],
+        oauth_cache_path: None,
+    },
 ];
 
 pub(crate) fn find_config_key(key_name: &str) -> Option<&'static ConfigKey> {
diff --git a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
new file mode 100644
index 000000000000..e908acabd995
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
@@ -0,0 +1,317 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { transcribeDictation } from "@/shared/api/dictation";
+import type { DictationProvider } from "@/shared/types/dictation";
+import {
+  advanceVadState,
+  createInitialVadState,
+  getFrameRms,
+} from "../lib/dictationVad";
+
+interface UseDictationRecorderOptions {
+  provider: DictationProvider | null;
+  providerConfigured: boolean;
+  preferredMicrophoneId: string | null;
+  onError: (message: string) => void;
+  onTranscription: (text: string) => void;
+}
+
+const SAMPLE_RATE = 16000;
+
+function encodeWav(samples: Float32Array, sampleRate: number): ArrayBuffer {
+  const buffer = new ArrayBuffer(44 + samples.length * 2);
+  const view = new DataView(buffer);
+  const write = (offset: number, value: string) => {
+    for (let index = 0; index < value.length; index += 1) {
+      view.setUint8(offset + index, value.charCodeAt(index));
+    }
+  };
+
+  write(0, "RIFF");
+  view.setUint32(4, 36 + samples.length * 2, true);
+  write(8, "WAVE");
+  write(12, "fmt ");
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true);
+  view.setUint16(22, 1, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * 2, true);
+  view.setUint16(32, 2, true);
+  view.setUint16(34, 16, true);
+  write(36, "data");
+  view.setUint32(40, samples.length * 2, true);
+
+  let offset = 44;
+  for (let index = 0; index < samples.length; index += 1) {
+    const sample = Math.max(-1, Math.min(1, samples[index] ?? 0));
+    view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true);
+    offset += 2;
+  }
+
+  return buffer;
+}
+
+function blobToBase64(blob: Blob): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.onloadend = () => resolve(String(reader.result).split(",")[1] ?? "");
+    reader.onerror = () => reject(reader.error);
+    reader.readAsDataURL(blob);
+  });
+}
+
+function toErrorMessage(error: unknown) {
+  if (error instanceof Error && error.message) {
+    return error.message;
+  }
+
+  return "Voice input failed";
+}
+
+export function useDictationRecorder({
+  provider,
+  providerConfigured,
+  preferredMicrophoneId,
+  onError,
+  onTranscription,
+}: UseDictationRecorderOptions) {
+  const [isRecording, setIsRecording] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const processorRef = useRef<ScriptProcessorNode | null>(null);
+  const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const samplesRef = useRef<Float32Array[]>([]);
+  const vadStateRef = useRef(createInitialVadState());
+  const pendingTranscriptionsRef = useRef(0);
+  const generationRef = useRef(0);
+  const providerRef = useRef(provider);
+  providerRef.current = provider;
+  const onErrorRef = useRef(onError);
+  onErrorRef.current = onError;
+  const onTranscriptionRef = useRef(onTranscription);
+  onTranscriptionRef.current = onTranscription;
+
+  const isEnabled = Boolean(provider && providerConfigured);
+
+  const cleanupAudioGraph = useCallback(() => {
+    processorRef.current?.disconnect();
+    processorRef.current = null;
+    sourceRef.current?.disconnect();
+    sourceRef.current = null;
+    void audioContextRef.current?.close();
+    audioContextRef.current = null;
+    streamRef.current?.getTracks().forEach((track) => {
+      track.stop();
+    });
+    streamRef.current = null;
+  }, []);
+
+  const transcribeChunk = useCallback(async (samples: Float32Array) => {
+    const activeProvider = providerRef.current;
+    if (!activeProvider) {
+      return;
+    }
+
+    const gen = generationRef.current;
+    pendingTranscriptionsRef.current += 1;
+    setIsTranscribing(true);
+
+    try {
+      const wavBlob = new Blob([encodeWav(samples, SAMPLE_RATE)], {
+        type: "audio/wav",
+      });
+      const audio = await blobToBase64(wavBlob);
+      const response = await transcribeDictation({
+        audio,
+        mimeType: "audio/wav",
+        provider: activeProvider,
+      });
+
+      if (gen !== generationRef.current) {
+        return;
+      }
+
+      if (response.text.trim()) {
+        onTranscriptionRef.current(response.text);
+      }
+    } catch (error) {
+      onErrorRef.current(toErrorMessage(error));
+    } finally {
+      pendingTranscriptionsRef.current -= 1;
+      if (pendingTranscriptionsRef.current === 0) {
+        setIsTranscribing(false);
+      }
+    }
+  }, []);
+
+  const flushPendingSamples = useCallback(() => {
+    const chunks = samplesRef.current;
+    if (chunks.length === 0) {
+      return;
+    }
+
+    const totalSamples = chunks.reduce(
+      (count, chunk) => count + chunk.length,
+      0,
+    );
+    const merged = new Float32Array(totalSamples);
+    let offset = 0;
+    for (const chunk of chunks) {
+      merged.set(chunk, offset);
+      offset += chunk.length;
+    }
+
+    samplesRef.current = [];
+    void transcribeChunk(merged);
+  }, [transcribeChunk]);
+
+  const stopRecording = useCallback(
+    (options?: { flushPending?: boolean }) => {
+      const flushPending = options?.flushPending ?? true;
+
+      if (flushPending && samplesRef.current.length > 0) {
+        flushPendingSamples();
+      } else if (!flushPending) {
+        samplesRef.current = [];
+        generationRef.current += 1;
+      }
+
+      vadStateRef.current = createInitialVadState();
+      cleanupAudioGraph();
+      setIsRecording(false);
+    },
+    [cleanupAudioGraph, flushPendingSamples],
+  );
+
+  const handleFrame = useCallback(
+    (samples: Float32Array) => {
+      const { decision, nextState } = advanceVadState(
+        vadStateRef.current,
+        getFrameRms(samples),
+      );
+      vadStateRef.current = nextState;
+
+      if (decision === "ignore") {
+        return;
+      }
+
+      if (decision === "discard") {
+        samplesRef.current = [];
+        return;
+      }
+
+      samplesRef.current.push(new Float32Array(samples));
+
+      if (decision === "append_and_flush") {
+        flushPendingSamples();
+      }
+    },
+    [flushPendingSamples],
+  );
+
+  const startRecording = useCallback(async () => {
+    if (!isEnabled || !provider) {
+      onError("Voice input is not configured");
+      return;
+    }
+
+    try {
+      const audioConstraints: MediaTrackConstraints = {
+        autoGainControl: true,
+        echoCancellation: true,
+        noiseSuppression: true,
+      };
+
+      if (preferredMicrophoneId) {
+        audioConstraints.deviceId = { exact: preferredMicrophoneId };
+      }
+
+      let stream: MediaStream;
+      try {
+        stream = await navigator.mediaDevices.getUserMedia({
+          audio: audioConstraints,
+        });
+      } catch (error) {
+        if (
+          preferredMicrophoneId &&
+          error instanceof DOMException &&
+          (error.name === "NotFoundError" ||
+            error.name === "OverconstrainedError")
+        ) {
+          delete audioConstraints.deviceId;
+          stream = await navigator.mediaDevices.getUserMedia({
+            audio: audioConstraints,
+          });
+        } else {
+          throw error;
+        }
+      }
+
+      streamRef.current = stream;
+      samplesRef.current = [];
+      vadStateRef.current = createInitialVadState();
+
+      const context = new AudioContext({ sampleRate: SAMPLE_RATE });
+      audioContextRef.current = context;
+      await context.resume();
+
+      const source = context.createMediaStreamSource(stream);
+      const processor = context.createScriptProcessor(1024, 1, 1);
+      const silence = context.createGain();
+      silence.gain.value = 0;
+
+      processor.onaudioprocess = (event) => {
+        const channel = event.inputBuffer.getChannelData(0);
+        handleFrame(new Float32Array(channel));
+      };
+
+      source.connect(processor);
+      processor.connect(silence);
+      silence.connect(context.destination);
+
+      sourceRef.current = source;
+      processorRef.current = processor;
+      setIsRecording(true);
+    } catch (error) {
+      stopRecording({ flushPending: false });
+      onError(toErrorMessage(error));
+    }
+  }, [
+    handleFrame,
+    isEnabled,
+    onError,
+    preferredMicrophoneId,
+    provider,
+    stopRecording,
+  ]);
+
+  const toggleRecording = useCallback(() => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      void startRecording();
+    }
+  }, [isRecording, startRecording, stopRecording]);
+
+  useEffect(
+    () => () => {
+      stopRecording({ flushPending: false });
+    },
+    [stopRecording],
+  );
+
+  useEffect(() => {
+    if (!provider && isRecording) {
+      stopRecording({ flushPending: false });
+    }
+  }, [isRecording, provider, stopRecording]);
+
+  return {
+    isEnabled,
+    isRecording,
+    isTranscribing,
+    startRecording,
+    stopRecording,
+    toggleRecording,
+  };
+}
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
new file mode 100644
index 000000000000..12fe9ce1f25f
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -0,0 +1,117 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { getDictationConfig } from "@/shared/api/dictation";
+import type { DictationProviderStatus } from "@/shared/types/dictation";
+import type { ChatAttachmentDraft } from "@/shared/types/messages";
+import { useDictationRecorder } from "./useDictationRecorder";
+import { useVoiceInputPreferences } from "./useVoiceInputPreferences";
+import {
+  appendTranscribedText,
+  getAutoSubmitMatch,
+  getDefaultDictationProvider,
+  VOICE_DICTATION_CONFIG_EVENT,
+} from "../lib/voiceInput";
+
+interface UseVoiceDictationOptions {
+  text: string;
+  setText: (value: string) => void;
+  attachments: ChatAttachmentDraft[];
+  clearAttachments: () => void;
+  selectedPersonaId: string | null;
+  onSend: (
+    text: string,
+    personaId?: string,
+    attachments?: ChatAttachmentDraft[],
+  ) => void;
+  resetTextarea: () => void;
+}
+
+export function useVoiceDictation({
+  text,
+  setText,
+  attachments,
+  clearAttachments,
+  selectedPersonaId,
+  onSend,
+  resetTextarea,
+}: UseVoiceDictationOptions) {
+  const voicePrefs = useVoiceInputPreferences();
+  const [providerStatuses, setProviderStatuses] = useState<
+    Partial<Record<string, DictationProviderStatus>>
+  >({});
+
+  const fetchDictationConfig = useCallback(() => {
+    getDictationConfig()
+      .then(setProviderStatuses)
+      .catch(() => {});
+  }, []);
+
+  useEffect(() => {
+    fetchDictationConfig();
+    window.addEventListener(VOICE_DICTATION_CONFIG_EVENT, fetchDictationConfig);
+    return () =>
+      window.removeEventListener(
+        VOICE_DICTATION_CONFIG_EVENT,
+        fetchDictationConfig,
+      );
+  }, [fetchDictationConfig]);
+
+  const activeVoiceProvider =
+    voicePrefs.selectedProvider ??
+    (voicePrefs.hasStoredProviderPreference
+      ? null
+      : getDefaultDictationProvider(providerStatuses));
+
+  const providerConfigured =
+    activeVoiceProvider != null &&
+    providerStatuses[activeVoiceProvider]?.configured === true;
+
+  const stopRecordingRef = useRef<
+    (options?: { flushPending?: boolean }) => void
+  >(() => {});
+
+  const handleTranscription = useCallback(
+    (fragment: string) => {
+      const match = getAutoSubmitMatch(fragment, voicePrefs.autoSubmitPhrases);
+      if (match) {
+        const merged = appendTranscribedText(text, match.textWithoutPhrase);
+        if (merged.trim()) {
+          stopRecordingRef.current({ flushPending: false });
+          onSend(
+            merged.trim(),
+            selectedPersonaId ?? undefined,
+            attachments.length > 0 ? attachments : undefined,
+          );
+          setText("");
+          clearAttachments();
+          resetTextarea();
+        }
+      } else {
+        const merged = appendTranscribedText(text, fragment);
+        setText(merged);
+      }
+    },
+    [
+      attachments,
+      clearAttachments,
+      onSend,
+      resetTextarea,
+      selectedPersonaId,
+      setText,
+      text,
+      voicePrefs.autoSubmitPhrases,
+    ],
+  );
+
+  const handleVoiceError = useCallback((_message: string) => {}, []);
+
+  const dictation = useDictationRecorder({
+    provider: activeVoiceProvider,
+    providerConfigured,
+    preferredMicrophoneId: voicePrefs.preferredMicrophoneId,
+    onError: handleVoiceError,
+    onTranscription: handleTranscription,
+  });
+  stopRecordingRef.current = dictation.stopRecording;
+
+  return dictation;
+}
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
new file mode 100644
index 000000000000..602c125e58ee
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -0,0 +1,161 @@
+import { useCallback, useEffect, useMemo, useState } from "react";
+import {
+  DISABLED_DICTATION_PROVIDER_STORAGE_VALUE,
+  DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
+  VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY,
+  VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
+  VOICE_DICTATION_PROVIDER_STORAGE_KEY,
+  normalizeDictationProvider,
+  parseAutoSubmitPhrases,
+} from "../lib/voiceInput";
+import type { DictationProvider } from "@/shared/types/dictation";
+
+const VOICE_INPUT_PREFERENCES_EVENT = "goose:voice-input-preferences";
+
+function readStoredAutoSubmitPhrases() {
+  try {
+    return (
+      window.localStorage.getItem(VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY) ??
+      DEFAULT_AUTO_SUBMIT_PHRASES_RAW
+    );
+  } catch {
+    return DEFAULT_AUTO_SUBMIT_PHRASES_RAW;
+  }
+}
+
+function readStoredDictationProvider(): DictationProvider | null {
+  try {
+    const storedValue = window.localStorage.getItem(
+      VOICE_DICTATION_PROVIDER_STORAGE_KEY,
+    );
+
+    if (storedValue === DISABLED_DICTATION_PROVIDER_STORAGE_VALUE) {
+      return null;
+    }
+
+    return normalizeDictationProvider(storedValue);
+  } catch {
+    return null;
+  }
+}
+
+function readHasStoredDictationProviderPreference() {
+  try {
+    return (
+      window.localStorage.getItem(VOICE_DICTATION_PROVIDER_STORAGE_KEY) !== null
+    );
+  } catch {
+    return false;
+  }
+}
+
+function readStoredPreferredMicrophoneId() {
+  try {
+    return window.localStorage.getItem(
+      VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
+    );
+  } catch {
+    return null;
+  }
+}
+
+export function useVoiceInputPreferences() {
+  const [rawAutoSubmitPhrases, setRawAutoSubmitPhrasesState] = useState(
+    readStoredAutoSubmitPhrases,
+  );
+  const [selectedProvider, setSelectedProviderState] = useState(
+    readStoredDictationProvider,
+  );
+  const [hasStoredProviderPreference, setHasStoredProviderPreferenceState] =
+    useState(readHasStoredDictationProviderPreference);
+  const [preferredMicrophoneId, setPreferredMicrophoneIdState] = useState(
+    readStoredPreferredMicrophoneId,
+  );
+
+  useEffect(() => {
+    const syncFromStorage = () => {
+      setRawAutoSubmitPhrasesState(readStoredAutoSubmitPhrases());
+      setSelectedProviderState(readStoredDictationProvider());
+      setHasStoredProviderPreferenceState(
+        readHasStoredDictationProviderPreference(),
+      );
+      setPreferredMicrophoneIdState(readStoredPreferredMicrophoneId());
+    };
+
+    window.addEventListener("storage", syncFromStorage);
+    window.addEventListener(
+      VOICE_INPUT_PREFERENCES_EVENT,
+      syncFromStorage as EventListener,
+    );
+
+    return () => {
+      window.removeEventListener("storage", syncFromStorage);
+      window.removeEventListener(
+        VOICE_INPUT_PREFERENCES_EVENT,
+        syncFromStorage as EventListener,
+      );
+    };
+  }, []);
+
+  const setRawAutoSubmitPhrases = useCallback((value: string) => {
+    setRawAutoSubmitPhrasesState(value);
+
+    try {
+      window.localStorage.setItem(VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY, value);
+      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
+    } catch {
+      // localStorage may be unavailable
+    }
+  }, []);
+
+  const setSelectedProvider = useCallback((value: DictationProvider | null) => {
+    setSelectedProviderState(value);
+    setHasStoredProviderPreferenceState(true);
+
+    try {
+      window.localStorage.setItem(
+        VOICE_DICTATION_PROVIDER_STORAGE_KEY,
+        value ?? DISABLED_DICTATION_PROVIDER_STORAGE_VALUE,
+      );
+      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
+    } catch {
+      // localStorage may be unavailable
+    }
+  }, []);
+
+  const setPreferredMicrophoneId = useCallback((value: string | null) => {
+    setPreferredMicrophoneIdState(value);
+
+    try {
+      if (value) {
+        window.localStorage.setItem(
+          VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
+          value,
+        );
+      } else {
+        window.localStorage.removeItem(
+          VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
+        );
+      }
+      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
+    } catch {
+      // localStorage may be unavailable
+    }
+  }, []);
+
+  const autoSubmitPhrases = useMemo(
+    () => parseAutoSubmitPhrases(rawAutoSubmitPhrases),
+    [rawAutoSubmitPhrases],
+  );
+
+  return {
+    autoSubmitPhrases,
+    hasStoredProviderPreference,
+    preferredMicrophoneId,
+    rawAutoSubmitPhrases,
+    selectedProvider,
+    setPreferredMicrophoneId,
+    setRawAutoSubmitPhrases,
+    setSelectedProvider,
+  };
+}
diff --git a/ui/goose2/src/features/chat/lib/dictationVad.test.ts b/ui/goose2/src/features/chat/lib/dictationVad.test.ts
new file mode 100644
index 000000000000..89e96045c507
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/dictationVad.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, it } from "vitest";
+import { advanceVadState, createInitialVadState } from "./dictationVad";
+
+function runFrames(levels: number[]) {
+  const decisions: string[] = [];
+  let state = createInitialVadState();
+
+  for (const level of levels) {
+    const result = advanceVadState(state, level);
+    decisions.push(result.decision);
+    state = result.nextState;
+  }
+
+  return decisions;
+}
+
+describe("dictationVad", () => {
+  it("ignores silence-only audio", () => {
+    expect(runFrames([0, 0, 0, 0])).toEqual([
+      "ignore",
+      "ignore",
+      "ignore",
+      "ignore",
+    ]);
+  });
+
+  it("discards short noise bursts that never confirm speech", () => {
+    expect(runFrames([0.03, 0, 0, 0])).toEqual([
+      "append",
+      "append",
+      "append",
+      "discard",
+    ]);
+  });
+
+  it("flushes a chunk after speech followed by trailing silence", () => {
+    expect(runFrames([0.03, 0.03, 0.03, 0, 0, 0, 0, 0, 0])).toContain(
+      "append_and_flush",
+    );
+  });
+
+  it("returns to ignoring silence after a flush, ready for another chunk", () => {
+    const decisions = runFrames([
+      0.03, 0.03, 0.03, 0, 0, 0, 0, 0, 0, 0.03, 0.03, 0.03, 0, 0, 0, 0, 0, 0,
+    ]);
+
+    expect(
+      decisions.filter((decision) => decision === "append_and_flush"),
+    ).toHaveLength(2);
+  });
+});
diff --git a/ui/goose2/src/features/chat/lib/dictationVad.ts b/ui/goose2/src/features/chat/lib/dictationVad.ts
new file mode 100644
index 000000000000..0b4561e8cbae
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/dictationVad.ts
@@ -0,0 +1,147 @@
+export type VadPhase = "idle" | "primed" | "speaking" | "trailing";
+
+export type VadDecision = "ignore" | "append" | "append_and_flush" | "discard";
+
+export interface VadState {
+  phase: VadPhase;
+  speechFrames: number;
+  silenceFrames: number;
+  framesInChunk: number;
+}
+
+const SPEECH_RMS_THRESHOLD = 0.018;
+const SPEECH_CONFIRMATION_FRAMES = 2;
+const MAX_PRIMED_SILENCE_FRAMES = 2;
+const TRAILING_SILENCE_FRAMES = 6;
+const MIN_SPEECH_FRAMES = 3;
+
+export function createInitialVadState(): VadState {
+  return {
+    phase: "idle",
+    speechFrames: 0,
+    silenceFrames: 0,
+    framesInChunk: 0,
+  };
+}
+
+export function getFrameRms(samples: Float32Array): number {
+  let sum = 0;
+  for (let index = 0; index < samples.length; index += 1) {
+    const value = samples[index] ?? 0;
+    sum += value * value;
+  }
+
+  return Math.sqrt(sum / Math.max(samples.length, 1));
+}
+
+export function advanceVadState(
+  state: VadState,
+  frameRms: number,
+): { decision: VadDecision; nextState: VadState } {
+  const isSpeech = frameRms >= SPEECH_RMS_THRESHOLD;
+
+  if (state.phase === "idle") {
+    if (!isSpeech) {
+      return { decision: "ignore" as const, nextState: state };
+    }
+
+    return {
+      decision: "append" as const,
+      nextState: {
+        phase: "primed" as const,
+        speechFrames: 1,
+        silenceFrames: 0,
+        framesInChunk: 1,
+      },
+    };
+  }
+
+  if (state.phase === "primed") {
+    if (isSpeech) {
+      const speechFrames = state.speechFrames + 1;
+      return {
+        decision: "append" as const,
+        nextState: {
+          phase:
+            speechFrames >= SPEECH_CONFIRMATION_FRAMES ? "speaking" : "primed",
+          speechFrames,
+          silenceFrames: 0,
+          framesInChunk: state.framesInChunk + 1,
+        },
+      };
+    }
+
+    const silenceFrames = state.silenceFrames + 1;
+    if (silenceFrames > MAX_PRIMED_SILENCE_FRAMES) {
+      return {
+        decision: "discard" as const,
+        nextState: createInitialVadState(),
+      };
+    }
+
+    return {
+      decision: "append" as const,
+      nextState: {
+        ...state,
+        silenceFrames,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  if (state.phase === "speaking") {
+    if (isSpeech) {
+      return {
+        decision: "append" as const,
+        nextState: {
+          phase: "speaking" as const,
+          speechFrames: state.speechFrames + 1,
+          silenceFrames: 0,
+          framesInChunk: state.framesInChunk + 1,
+        },
+      };
+    }
+
+    return {
+      decision: "append" as const,
+      nextState: {
+        phase: "trailing" as const,
+        speechFrames: state.speechFrames,
+        silenceFrames: 1,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  if (isSpeech) {
+    return {
+      decision: "append" as const,
+      nextState: {
+        phase: "speaking" as const,
+        speechFrames: state.speechFrames + 1,
+        silenceFrames: 0,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  const silenceFrames = state.silenceFrames + 1;
+  if (silenceFrames < TRAILING_SILENCE_FRAMES) {
+    return {
+      decision: "append" as const,
+      nextState: {
+        ...state,
+        silenceFrames,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  return {
+    decision:
+      state.speechFrames >= MIN_SPEECH_FRAMES
+        ? ("append_and_flush" as const)
+        : ("discard" as const),
+    nextState: createInitialVadState(),
+  };
+}
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.test.ts b/ui/goose2/src/features/chat/lib/voiceInput.test.ts
new file mode 100644
index 000000000000..6ca3ae799d86
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/voiceInput.test.ts
@@ -0,0 +1,85 @@
+import { describe, expect, it } from "vitest";
+import {
+  appendTranscribedText,
+  getDefaultDictationProvider,
+  getAutoSubmitMatch,
+  parseAutoSubmitPhrases,
+  replaceTrailingTranscribedText,
+} from "./voiceInput";
+
+describe("voiceInput helpers", () => {
+  it("parses comma-separated auto-submit phrases", () => {
+    expect(parseAutoSubmitPhrases(" submit, Ship It ,submit ,, ")).toEqual([
+      "submit",
+      "ship it",
+    ]);
+  });
+
+  it("appends dictated text without smashing words together", () => {
+    expect(appendTranscribedText("hello", "world")).toBe("hello world");
+    expect(appendTranscribedText("hello ", "world")).toBe("hello world");
+    expect(appendTranscribedText("hello", ", world")).toBe("hello, world");
+  });
+
+  it("replaces only the trailing dictated segment", () => {
+    expect(
+      replaceTrailingTranscribedText(
+        "draft dictated text",
+        "dictated text",
+        "dictated text submit",
+      ),
+    ).toBe("draft dictated text submit");
+  });
+
+  it("matches auto-submit phrases only at the end of dictated text", () => {
+    expect(getAutoSubmitMatch("please submit now", ["submit"])).toBeNull();
+    expect(getAutoSubmitMatch("please SUBMIT.", ["submit"])).toEqual({
+      matchedPhrase: "submit",
+      textWithoutPhrase: "please",
+    });
+  });
+
+  it("picks the first configured dictation provider by priority", () => {
+    expect(
+      getDefaultDictationProvider({
+        openai: {
+          configured: false,
+          description: "OpenAI",
+          usesProviderConfig: true,
+          availableModels: [],
+        },
+        groq: {
+          configured: true,
+          description: "Groq",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+        local: {
+          configured: true,
+          description: "Local",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+      }),
+    ).toBe("groq");
+  });
+
+  it("falls back to the first available provider when none are configured", () => {
+    expect(
+      getDefaultDictationProvider({
+        elevenlabs: {
+          configured: false,
+          description: "ElevenLabs",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+        local: {
+          configured: false,
+          description: "Local",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+      }),
+    ).toBe("local");
+  });
+});
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.ts b/ui/goose2/src/features/chat/lib/voiceInput.ts
new file mode 100644
index 000000000000..9997c451311a
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/voiceInput.ts
@@ -0,0 +1,177 @@
+import type {
+  DictationProvider,
+  DictationProviderStatus,
+} from "@/shared/types/dictation";
+
+export const VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY =
+  "goose:voice-auto-submit-phrases";
+export const VOICE_DICTATION_PROVIDER_STORAGE_KEY =
+  "goose:voice-dictation-provider";
+export const VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY =
+  "goose:voice-dictation-preferred-mic";
+export const VOICE_DICTATION_CONFIG_EVENT = "goose:voice-dictation-config";
+export const DISABLED_DICTATION_PROVIDER_STORAGE_VALUE = "__disabled__";
+
+export const DEFAULT_AUTO_SUBMIT_PHRASES_RAW = "submit";
+
+const TRAILING_PUNCTUATION_REGEX = /[\s"'`.,!?;:)\]}]+$/u;
+
+function normalizePhrase(value: string): string {
+  return value
+    .toLowerCase()
+    .replace(/\s+/g, " ")
+    .trim()
+    .replace(TRAILING_PUNCTUATION_REGEX, "")
+    .trim();
+}
+
+export function parseAutoSubmitPhrases(rawValue: string | null | undefined) {
+  if (!rawValue) {
+    return [];
+  }
+
+  return Array.from(
+    new Set(
+      rawValue
+        .split(",")
+        .map((value) => normalizePhrase(value))
+        .filter(Boolean),
+    ),
+  );
+}
+
+export function normalizeDictationProvider(
+  value: string | null | undefined,
+): DictationProvider | null {
+  if (
+    value === "openai" ||
+    value === "groq" ||
+    value === "elevenlabs" ||
+    value === "local"
+  ) {
+    return value;
+  }
+
+  return null;
+}
+
+export function getDefaultDictationProvider(
+  providerStatuses: Partial<Record<DictationProvider, DictationProviderStatus>>,
+): DictationProvider | null {
+  const configuredProviderPriority: DictationProvider[] = [
+    "openai",
+    "groq",
+    "elevenlabs",
+    "local",
+  ];
+  const fallbackProviderPriority: DictationProvider[] = [
+    "local",
+    "openai",
+    "groq",
+    "elevenlabs",
+  ];
+
+  for (const provider of configuredProviderPriority) {
+    if (providerStatuses[provider]?.configured) {
+      return provider;
+    }
+  }
+
+  for (const provider of fallbackProviderPriority) {
+    if (providerStatuses[provider]) {
+      return provider;
+    }
+  }
+
+  return null;
+}
+
+export function appendTranscribedText(baseText: string, fragment: string) {
+  const normalizedFragment = fragment.replace(/\s+/g, " ").trim();
+  if (!normalizedFragment) {
+    return baseText;
+  }
+
+  if (!baseText.trim()) {
+    return normalizedFragment;
+  }
+
+  if (/[\s([{/-]$/.test(baseText) || /^[,.;!?)]/.test(normalizedFragment)) {
+    return `${baseText}${normalizedFragment}`;
+  }
+
+  return `${baseText} ${normalizedFragment}`;
+}
+
+export function replaceTrailingTranscribedText(
+  fullText: string,
+  previousTranscribedText: string,
+  nextTranscribedText: string,
+) {
+  if (!previousTranscribedText) {
+    return appendTranscribedText(fullText, nextTranscribedText);
+  }
+
+  if (fullText.endsWith(previousTranscribedText)) {
+    return appendTranscribedText(
+      fullText.slice(0, -previousTranscribedText.length),
+      nextTranscribedText,
+    );
+  }
+
+  const trimmedPreviousText = previousTranscribedText.trim();
+  if (trimmedPreviousText && fullText.endsWith(trimmedPreviousText)) {
+    return appendTranscribedText(
+      fullText.slice(0, -trimmedPreviousText.length),
+      nextTranscribedText,
+    );
+  }
+
+  return appendTranscribedText(fullText, nextTranscribedText);
+}
+
+export function getAutoSubmitMatch(
+  transcribedText: string,
+  autoSubmitPhrases: string[],
+) {
+  const normalizedTranscribedText = normalizePhrase(transcribedText);
+  if (!normalizedTranscribedText) {
+    return null;
+  }
+
+  const sortedPhrases = [...autoSubmitPhrases].sort(
+    (left, right) => right.length - left.length,
+  );
+
+  for (const phrase of sortedPhrases) {
+    if (!normalizedTranscribedText.endsWith(phrase)) {
+      continue;
+    }
+
+    const phraseStartIndex = normalizedTranscribedText.length - phrase.length;
+    if (
+      phraseStartIndex > 0 &&
+      normalizedTranscribedText[phraseStartIndex - 1] !== " "
+    ) {
+      continue;
+    }
+
+    const trimmedText = transcribedText.replace(TRAILING_PUNCTUATION_REGEX, "");
+    const textWithoutPhrase = trimmedText.slice(0, -phrase.length).trimEnd();
+
+    return {
+      matchedPhrase: phrase,
+      textWithoutPhrase,
+    };
+  }
+
+  return null;
+}
+
+export function notifyVoiceDictationConfigChanged() {
+  try {
+    window.dispatchEvent(new Event(VOICE_DICTATION_CONFIG_EVENT));
+  } catch {
+    // no-op
+  }
+}
diff --git a/ui/goose2/src/features/chat/ui/ChatInput.tsx b/ui/goose2/src/features/chat/ui/ChatInput.tsx
index 9b40f2b768f3..8013c6f7eb02 100644
--- a/ui/goose2/src/features/chat/ui/ChatInput.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInput.tsx
@@ -22,6 +22,7 @@ import {
 } from "../hooks/useChatInputAttachments";
 import type { ModelOption } from "../types";
 import { ChatInputAttachments } from "./ChatInputAttachments";
+import { useVoiceDictation } from "../hooks/useVoiceDictation";
 
 export interface ProjectOption {
   id: string;
@@ -121,6 +122,22 @@ export function ChatInput({
     clearAttachments,
   } = useChatInputAttachments();
 
+  const resetTextarea = useCallback(() => {
+    if (textareaRef.current) {
+      textareaRef.current.style.height = "auto";
+    }
+  }, []);
+
+  const dictation = useVoiceDictation({
+    text,
+    setText,
+    attachments,
+    clearAttachments,
+    selectedPersonaId,
+    onSend,
+    resetTextarea,
+  });
+
   const activePersona = useMemo(
     () => personas.find((persona) => persona.id === selectedPersonaId) ?? null,
     [personas, selectedPersonaId],
@@ -178,6 +195,14 @@ export function ChatInput({
   useEffect(() => textareaRef.current?.focus(), []);
 
   const handleSend = useCallback(() => {
+    // If recording, stop and flush — the transcription callback will
+    // append text and may auto-submit. Don't send the current text yet
+    // because the final transcription hasn't arrived.
+    if (dictation.isRecording || dictation.isTranscribing) {
+      dictation.stopRecording();
+      return;
+    }
+
     if (!canSend) {
       return;
     }
@@ -196,6 +221,7 @@ export function ChatInput({
     attachments,
     canSend,
     clearAttachments,
+    dictation,
     onSend,
     selectedPersonaId,
     setText,
@@ -408,7 +434,13 @@ export function ChatInput({
                   onChange={handleInput}
                   onKeyDown={handleKeyDown}
                   onPaste={handlePaste}
-                  placeholder={effectivePlaceholder}
+                  placeholder={
+                    dictation.isRecording
+                      ? t("toolbar.voiceInputRecording")
+                      : dictation.isTranscribing
+                        ? t("toolbar.voiceInputTranscribing")
+                        : effectivePlaceholder
+                  }
                   disabled={disabled}
                   rows={1}
                   className="mb-3 min-h-[36px] max-h-[200px] w-full resize-none bg-transparent px-1 text-[14px] leading-relaxed text-foreground placeholder:font-light placeholder:text-muted-foreground/60 focus:outline-none focus-visible:ring-0 focus-visible:ring-offset-0 disabled:opacity-60"
@@ -447,6 +479,10 @@ export function ChatInput({
                 onSend={handleSend}
                 onStop={onStop}
                 isCompact={isCompact}
+                voiceEnabled={dictation.isEnabled}
+                voiceRecording={dictation.isRecording}
+                voiceTranscribing={dictation.isTranscribing}
+                onVoiceToggle={dictation.toggleRecording}
               />
             </div>
           </Popover>
diff --git a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
index 3e25b8f084ce..e5b553569a93 100644
--- a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
@@ -90,6 +90,11 @@ interface ChatInputToolbarProps {
   onAttachFiles?: () => void;
   onAttachFolders?: () => void;
   disabled?: boolean;
+  // Voice
+  voiceEnabled?: boolean;
+  voiceRecording?: boolean;
+  voiceTranscribing?: boolean;
+  onVoiceToggle?: () => void;
   // Layout
   isCompact: boolean;
 }
@@ -124,6 +129,10 @@ export function ChatInputToolbar({
   onAttachFiles,
   onAttachFolders,
   disabled = false,
+  voiceEnabled = false,
+  voiceRecording = false,
+  voiceTranscribing = false,
+  onVoiceToggle,
   isCompact,
 }: ChatInputToolbarProps) {
   const { t } = useTranslation("chat");
@@ -384,14 +393,32 @@ export function ChatInputToolbar({
                   type="button"
                   variant="ghost"
                   size="icon-sm"
-                  disabled
-                  aria-label={t("toolbar.voiceInputSoon")}
+                  disabled={!voiceEnabled || disabled}
+                  onClick={onVoiceToggle}
+                  aria-label={
+                    voiceRecording
+                      ? t("toolbar.voiceInputRecording")
+                      : t("toolbar.voiceInput")
+                  }
+                  className={cn(
+                    voiceRecording &&
+                      "bg-destructive/10 text-destructive hover:bg-destructive/20 hover:text-destructive",
+                    voiceTranscribing && "animate-pulse",
+                  )}
                 >
                   <Mic />
                 </Button>
               </span>
             </TooltipTrigger>
-            <TooltipContent>{t("toolbar.voiceInputSoon")}</TooltipContent>
+            <TooltipContent>
+              {!voiceEnabled
+                ? t("toolbar.voiceInputDisabled")
+                : voiceRecording
+                  ? t("toolbar.voiceInputRecording")
+                  : voiceTranscribing
+                    ? t("toolbar.voiceInputTranscribing")
+                    : t("toolbar.voiceInput")}
+            </TooltipContent>
           </Tooltip>
         </div>
 
diff --git a/ui/goose2/src/features/settings/ui/SettingsModal.tsx b/ui/goose2/src/features/settings/ui/SettingsModal.tsx
index 65ab6b6aff76..03400ccef214 100644
--- a/ui/goose2/src/features/settings/ui/SettingsModal.tsx
+++ b/ui/goose2/src/features/settings/ui/SettingsModal.tsx
@@ -21,6 +21,7 @@ import {
   SelectValue,
 } from "@/shared/ui/select";
 import {
+  Mic,
   Palette,
   Settings2,
   FolderKanban,
@@ -34,6 +35,7 @@ import { AppearanceSettings } from "./AppearanceSettings";
 import { DoctorSettings } from "./DoctorSettings";
 import { ProvidersSettings } from "./ProvidersSettings";
 import { ExtensionsSettings } from "@/features/extensions/ui/ExtensionsSettings";
+import { VoiceInputSettings } from "./VoiceInputSettings";
 import {
   listArchivedProjects,
   restoreProject,
@@ -50,6 +52,7 @@ const NAV_ITEMS = [
   { id: "appearance", labelKey: "nav.appearance", icon: Palette },
   { id: "providers", labelKey: "nav.providers", icon: IconPlug },
   { id: "extensions", labelKey: "nav.extensions", icon: IconPuzzle },
+  { id: "voice", labelKey: "nav.voice", icon: Mic },
   { id: "general", labelKey: "nav.general", icon: Settings2 },
   { id: "projects", labelKey: "nav.projects", icon: FolderKanban },
   { id: "chats", labelKey: "nav.chats", icon: MessageSquare },
@@ -241,6 +244,7 @@ export function SettingsModal({
               {activeSection === "appearance" && <AppearanceSettings />}
               {activeSection === "providers" && <ProvidersSettings />}
               {activeSection === "extensions" && <ExtensionsSettings />}
+              {activeSection === "voice" && <VoiceInputSettings />}
               {activeSection === "doctor" && <DoctorSettings />}
               {activeSection === "general" && (
                 <div className="space-y-6">
diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
new file mode 100644
index 000000000000..8ccc14908ecb
--- /dev/null
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -0,0 +1,465 @@
+import { useCallback, useEffect, useMemo, useState } from "react";
+import { useTranslation } from "react-i18next";
+import {
+  deleteDictationProviderSecret,
+  getDictationConfig,
+  saveDictationModelSelection,
+  saveDictationProviderSecret,
+} from "@/shared/api/dictation";
+import {
+  notifyVoiceDictationConfigChanged,
+  getDefaultDictationProvider,
+} from "@/features/chat/lib/voiceInput";
+import { useVoiceInputPreferences } from "@/features/chat/hooks/useVoiceInputPreferences";
+import type {
+  DictationProvider,
+  DictationProviderStatus,
+} from "@/shared/types/dictation";
+import { useAudioDevices } from "@/shared/ui/ai-elements/mic-selector";
+import { Button } from "@/shared/ui/button";
+import { Input } from "@/shared/ui/input";
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from "@/shared/ui/select";
+
+const DISABLED_PROVIDER = "__disabled__";
+
+export function VoiceInputSettings() {
+  const { t } = useTranslation(["settings", "chat", "common"]);
+  const {
+    hasStoredProviderPreference,
+    preferredMicrophoneId,
+    rawAutoSubmitPhrases,
+    selectedProvider,
+    setPreferredMicrophoneId,
+    setRawAutoSubmitPhrases,
+    setSelectedProvider,
+  } = useVoiceInputPreferences();
+  const [providerStatuses, setProviderStatuses] = useState<
+    Record<DictationProvider, DictationProviderStatus>
+  >({} as Record<DictationProvider, DictationProviderStatus>);
+  const [apiKeyInput, setApiKeyInput] = useState("");
+  const [isEditingApiKey, setIsEditingApiKey] = useState(false);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const {
+    devices,
+    error: devicesError,
+    hasPermission,
+    loadDevices,
+    loading: loadingDevices,
+  } = useAudioDevices();
+  const isMicrophoneSupported =
+    typeof navigator !== "undefined" && !!navigator.mediaDevices;
+  const permissionStatus = hasPermission ? "authorized" : "not_determined";
+  const requestPermission = loadDevices;
+
+  const refreshConfig = useCallback(async () => {
+    const nextConfig = await getDictationConfig();
+    setProviderStatuses(nextConfig);
+
+    if (!hasStoredProviderPreference) {
+      const defaultProvider = getDefaultDictationProvider(nextConfig);
+      if (defaultProvider) {
+        setSelectedProvider(defaultProvider);
+      }
+      return;
+    }
+
+    if (!selectedProvider) {
+      return;
+    }
+
+    if (!nextConfig[selectedProvider]) {
+      setSelectedProvider(null);
+    }
+  }, [hasStoredProviderPreference, selectedProvider, setSelectedProvider]);
+
+  useEffect(() => {
+    const load = async () => {
+      setLoading(true);
+      setError(null);
+
+      try {
+        await refreshConfig();
+      } catch (caughtError) {
+        setError(
+          caughtError instanceof Error
+            ? caughtError.message
+            : t("general.voiceInput.loadError"),
+        );
+      } finally {
+        setLoading(false);
+      }
+    };
+
+    void load();
+  }, [refreshConfig, t]);
+
+  const selectedStatus = selectedProvider
+    ? providerStatuses[selectedProvider]
+    : null;
+
+  const providerOptions = useMemo(
+    () =>
+      Object.entries(providerStatuses) as Array<
+        [DictationProvider, DictationProviderStatus]
+      >,
+    [providerStatuses],
+  );
+
+  const currentModelValue =
+    selectedStatus?.selectedModel ?? selectedStatus?.defaultModel ?? "";
+
+  const saveApiKey = useCallback(async () => {
+    if (!selectedProvider) {
+      return;
+    }
+
+    setError(null);
+    try {
+      await saveDictationProviderSecret(
+        selectedProvider,
+        apiKeyInput,
+        selectedStatus?.configKey ?? undefined,
+      );
+      setApiKeyInput("");
+      setIsEditingApiKey(false);
+      await refreshConfig();
+      notifyVoiceDictationConfigChanged();
+    } catch (caughtError) {
+      setError(
+        caughtError instanceof Error
+          ? caughtError.message
+          : t("general.voiceInput.saveError"),
+      );
+    }
+  }, [apiKeyInput, refreshConfig, selectedProvider, selectedStatus, t]);
+
+  const removeApiKey = useCallback(async () => {
+    if (!selectedProvider) {
+      return;
+    }
+
+    setError(null);
+    try {
+      await deleteDictationProviderSecret(
+        selectedProvider,
+        selectedStatus?.configKey ?? undefined,
+      );
+      setApiKeyInput("");
+      setIsEditingApiKey(false);
+      await refreshConfig();
+      notifyVoiceDictationConfigChanged();
+    } catch (caughtError) {
+      setError(
+        caughtError instanceof Error
+          ? caughtError.message
+          : t("general.voiceInput.deleteError"),
+      );
+    }
+  }, [refreshConfig, selectedProvider, selectedStatus, t]);
+
+  const handleModelChange = useCallback(
+    async (modelId: string) => {
+      if (!selectedProvider) {
+        return;
+      }
+
+      setError(null);
+      try {
+        await saveDictationModelSelection(selectedProvider, modelId);
+        await refreshConfig();
+        notifyVoiceDictationConfigChanged();
+      } catch (caughtError) {
+        setError(
+          caughtError instanceof Error
+            ? caughtError.message
+            : t("general.voiceInput.saveError"),
+        );
+      }
+    },
+    [refreshConfig, selectedProvider, t],
+  );
+
+  const selectedMicrophoneLabel = useMemo(() => {
+    if (!preferredMicrophoneId) {
+      return t("general.voiceInput.systemMicrophone");
+    }
+
+    return (
+      devices.find((device) => device.deviceId === preferredMicrophoneId)
+        ?.label || t("general.voiceInput.systemMicrophone")
+    );
+  }, [devices, preferredMicrophoneId, t]);
+
+  if (loading) {
+    return (
+      <div className="space-y-3">
+        <h4 className="text-sm font-semibold">
+          {t("general.voiceInput.label")}
+        </h4>
+        <p className="text-xs text-muted-foreground">
+          {t("common:labels.loading")}
+        </p>
+      </div>
+    );
+  }
+
+  return (
+    <div className="space-y-4">
+      <div>
+        <h4 className="text-sm font-semibold">
+          {t("general.voiceInput.label")}
+        </h4>
+        <p className="mt-1 text-xs text-muted-foreground">
+          {t("general.voiceInput.description")}
+        </p>
+      </div>
+
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <p className="text-xs font-medium text-foreground">
+          {t("general.voiceInput.providerLabel")}
+        </p>
+        <Select
+          value={selectedProvider ?? DISABLED_PROVIDER}
+          onValueChange={(value) =>
+            setSelectedProvider(
+              value === DISABLED_PROVIDER ? null : (value as DictationProvider),
+            )
+          }
+        >
+          <SelectTrigger className="w-full max-w-sm">
+            <SelectValue />
+          </SelectTrigger>
+          <SelectContent>
+            <SelectItem value={DISABLED_PROVIDER}>
+              {t("general.voiceInput.disabled")}
+            </SelectItem>
+            {providerOptions.map(([providerId, status]) => (
+              <SelectItem key={providerId} value={providerId}>
+                {t(`general.voiceInput.providers.${providerId}`)}
+                {!status.configured
+                  ? ` ${t("general.voiceInput.notConfiguredSuffix")}`
+                  : ""}
+              </SelectItem>
+            ))}
+          </SelectContent>
+        </Select>
+      </div>
+
+      {selectedStatus ? (
+        <>
+          {!selectedStatus.usesProviderConfig &&
+          selectedProvider !== "local" ? (
+            <div className="space-y-3 rounded-lg border border-border px-3 py-3">
+              {isEditingApiKey ? (
+                <>
+                  <div>
+                    <p className="text-xs font-medium text-foreground">
+                      {t("general.voiceInput.apiKeyLabel")}
+                    </p>
+                    <p className="mt-1 text-xs text-muted-foreground">
+                      {t("general.voiceInput.apiKeyDescription")}
+                    </p>
+                  </div>
+                  <div className="flex flex-col gap-2 sm:flex-row">
+                    <Input
+                      type="password"
+                      value={apiKeyInput}
+                      onChange={(event) => setApiKeyInput(event.target.value)}
+                      placeholder={t("general.voiceInput.apiKeyPlaceholder")}
+                      className="max-w-sm"
+                    />
+                    <div className="flex gap-2">
+                      <Button
+                        type="button"
+                        size="sm"
+                        onClick={() => void saveApiKey()}
+                      >
+                        {t("common:actions.save")}
+                      </Button>
+                      <Button
+                        type="button"
+                        variant="outline-flat"
+                        size="sm"
+                        onClick={() => {
+                          setApiKeyInput("");
+                          setIsEditingApiKey(false);
+                        }}
+                      >
+                        {t("common:actions.cancel")}
+                      </Button>
+                    </div>
+                  </div>
+                </>
+              ) : (
+                <div className="flex items-center justify-between gap-3">
+                  <div>
+                    <p className="text-xs font-medium text-foreground">
+                      {t("general.voiceInput.apiKeyLabel")}
+                    </p>
+                    <p className="mt-1 text-xs text-muted-foreground">
+                      {selectedStatus.configured
+                        ? t("general.voiceInput.apiKeyConfigured")
+                        : t("general.voiceInput.apiKeyDescription")}
+                    </p>
+                  </div>
+                  <div className="flex gap-2 flex-shrink-0">
+                    <Button
+                      type="button"
+                      size="sm"
+                      variant="outline-flat"
+                      onClick={() => setIsEditingApiKey(true)}
+                    >
+                      {selectedStatus.configured
+                        ? t("general.voiceInput.updateApiKey")
+                        : t("general.voiceInput.addApiKey")}
+                    </Button>
+                    {selectedStatus.configured ? (
+                      <Button
+                        type="button"
+                        size="sm"
+                        variant="ghost"
+                        className="text-destructive hover:text-destructive"
+                        onClick={() => void removeApiKey()}
+                      >
+                        {t("general.voiceInput.removeApiKey")}
+                      </Button>
+                    ) : null}
+                  </div>
+                </div>
+              )}
+            </div>
+          ) : null}
+
+          {selectedProvider === "local" ? (
+            <div className="rounded-lg border border-border px-3 py-3">
+              <p className="text-xs font-medium text-foreground">
+                {t("general.voiceInput.localModelLabel")}
+              </p>
+              <p className="mt-1 text-xs text-muted-foreground">
+                {t("general.voiceInput.localModelUnavailable")}
+              </p>
+            </div>
+          ) : (selectedStatus.availableModels ?? []).length > 0 ? (
+            <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+              <p className="text-xs font-medium text-foreground">
+                {t("general.voiceInput.modelLabel")}
+              </p>
+              <Select
+                value={currentModelValue}
+                onValueChange={(value) => void handleModelChange(value)}
+              >
+                <SelectTrigger className="w-full max-w-sm">
+                  <SelectValue />
+                </SelectTrigger>
+                <SelectContent>
+                  {(selectedStatus.availableModels ?? []).map((model) => (
+                    <SelectItem key={model.id} value={model.id}>
+                      {model.label}
+                    </SelectItem>
+                  ))}
+                </SelectContent>
+              </Select>
+              <p className="text-xs text-muted-foreground">
+                {(selectedStatus.availableModels ?? []).find(
+                  (model) => model.id === currentModelValue,
+                )?.description ?? ""}
+              </p>
+            </div>
+          ) : null}
+        </>
+      ) : null}
+
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <div className="flex items-center justify-between gap-3">
+          <div>
+            <p className="text-xs font-medium text-foreground">
+              {t("general.voiceInput.microphoneLabel")}
+            </p>
+            <p className="mt-1 text-xs text-muted-foreground">
+              {isMicrophoneSupported
+                ? t("general.voiceInput.microphoneDescription")
+                : t("general.voiceInput.microphoneUnavailable")}
+            </p>
+          </div>
+          {isMicrophoneSupported && !hasPermission ? (
+            <Button
+              type="button"
+              size="sm"
+              variant="outline-flat"
+              disabled={loadingDevices}
+              onClick={() => void requestPermission()}
+            >
+              {t("general.voiceInput.grantMicrophone")}
+            </Button>
+          ) : null}
+        </div>
+
+        {!devicesError &&
+        !hasPermission &&
+        permissionStatus === "not_determined" ? (
+          <p className="text-xs text-muted-foreground">
+            {t("general.voiceInput.microphoneAccessPrompt")}
+          </p>
+        ) : null}
+
+        {devicesError ? (
+          <p className="text-xs text-muted-foreground">{devicesError}</p>
+        ) : null}
+
+        {isMicrophoneSupported && hasPermission ? (
+          <Select
+            value={preferredMicrophoneId ?? DISABLED_PROVIDER}
+            onValueChange={(value) =>
+              setPreferredMicrophoneId(
+                value === DISABLED_PROVIDER ? null : value,
+              )
+            }
+          >
+            <SelectTrigger className="w-full max-w-sm">
+              <SelectValue>{selectedMicrophoneLabel}</SelectValue>
+            </SelectTrigger>
+            <SelectContent>
+              <SelectItem value={DISABLED_PROVIDER}>
+                {t("general.voiceInput.systemMicrophone")}
+              </SelectItem>
+              {devices.map((device) => (
+                <SelectItem key={device.deviceId} value={device.deviceId}>
+                  {device.label || t("general.voiceInput.unknownMicrophone")}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+        ) : null}
+      </div>
+
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <label
+          htmlFor="voice-auto-submit-phrases"
+          className="text-xs font-medium text-foreground"
+        >
+          {t("general.voiceInput.autoSubmitLabel")}
+        </label>
+        <p className="text-xs text-muted-foreground">
+          {t("general.voiceInput.autoSubmitDescription")}
+        </p>
+        <Input
+          id="voice-auto-submit-phrases"
+          type="text"
+          value={rawAutoSubmitPhrases}
+          onChange={(event) => setRawAutoSubmitPhrases(event.target.value)}
+          placeholder={t("general.voiceInput.placeholder")}
+          className="max-w-sm"
+        />
+      </div>
+
+      {error ? <p className="text-xs text-destructive">{error}</p> : null}
+    </div>
+  );
+}
diff --git a/ui/goose2/src/shared/api/dictation.ts b/ui/goose2/src/shared/api/dictation.ts
new file mode 100644
index 000000000000..4473225b2684
--- /dev/null
+++ b/ui/goose2/src/shared/api/dictation.ts
@@ -0,0 +1,100 @@
+import { invoke } from "@tauri-apps/api/core";
+import type {
+  DictationDownloadProgress,
+  DictationProvider,
+  DictationProviderStatus,
+  DictationTranscribeResponse,
+  MicrophonePermissionStatus,
+  WhisperModelStatus,
+} from "@/shared/types/dictation";
+
+export async function getDictationConfig(): Promise<
+  Record<DictationProvider, DictationProviderStatus>
+> {
+  return invoke("get_dictation_config");
+}
+
+export async function transcribeDictation(request: {
+  audio: string;
+  mimeType: string;
+  provider: DictationProvider;
+}): Promise<DictationTranscribeResponse> {
+  return invoke("transcribe_dictation", {
+    request: {
+      audio: request.audio,
+      mimeType: request.mimeType,
+      provider: request.provider,
+    },
+  });
+}
+
+export async function saveDictationModelSelection(
+  provider: DictationProvider,
+  modelId: string,
+): Promise<void> {
+  return invoke("save_dictation_model_selection", { provider, modelId });
+}
+
+export async function saveDictationProviderSecret(
+  _provider: DictationProvider,
+  value: string,
+  configKey?: string,
+): Promise<void> {
+  if (!configKey) {
+    throw new Error("No config key for this provider");
+  }
+  return invoke("save_provider_field", { key: configKey, value });
+}
+
+export async function deleteDictationProviderSecret(
+  provider: DictationProvider,
+  _configKey?: string,
+): Promise<void> {
+  const providerIdMap: Record<string, string> = {
+    groq: "dictation_groq",
+    elevenlabs: "dictation_elevenlabs",
+  };
+  const providerId = providerIdMap[provider];
+  if (!providerId) {
+    throw new Error("Cannot delete secrets for this provider");
+  }
+  return invoke("delete_provider_config", { providerId });
+}
+
+export async function listDictationLocalModels(): Promise<
+  WhisperModelStatus[]
+> {
+  return invoke("list_dictation_local_models");
+}
+
+export async function downloadDictationLocalModel(
+  modelId: string,
+): Promise<void> {
+  return invoke("download_dictation_local_model", { modelId });
+}
+
+export async function getDictationLocalModelDownloadProgress(
+  modelId: string,
+): Promise<DictationDownloadProgress | null> {
+  return invoke("get_dictation_local_model_download_progress", { modelId });
+}
+
+export async function cancelDictationLocalModelDownload(
+  modelId: string,
+): Promise<void> {
+  return invoke("cancel_dictation_local_model_download", { modelId });
+}
+
+export async function deleteDictationLocalModel(
+  modelId: string,
+): Promise<void> {
+  return invoke("delete_dictation_local_model", { modelId });
+}
+
+export async function getMicrophonePermissionStatus(): Promise<MicrophonePermissionStatus> {
+  return invoke("get_microphone_permission_status");
+}
+
+export async function requestMicrophonePermission(): Promise<MicrophonePermissionStatus> {
+  return invoke("request_microphone_permission");
+}
diff --git a/ui/goose2/src/shared/i18n/locales/en/chat.json b/ui/goose2/src/shared/i18n/locales/en/chat.json
index efe6776e3d87..424007cc8c5c 100644
--- a/ui/goose2/src/shared/i18n/locales/en/chat.json
+++ b/ui/goose2/src/shared/i18n/locales/en/chat.json
@@ -169,7 +169,11 @@
     "selectProject": "Select project",
     "sendMessage": "Send message",
     "stopGeneration": "Stop generation",
-    "voiceInputSoon": "Voice input (coming soon)"
+    "voiceInput": "Voice dictation",
+    "voiceInputDisabled": "Configure a voice provider in Settings to enable dictation",
+    "voiceInputRecording": "Listening...",
+    "voiceInputTranscribing": "Transcribing...",
+    "voiceInputAutoSubmitHint": "Say \"submit\" to send"
   },
   "tools": {
     "fileNotFound": "File not found: {{path}}",
diff --git a/ui/goose2/src/shared/i18n/locales/en/settings.json b/ui/goose2/src/shared/i18n/locales/en/settings.json
index be55f4766a1d..e4c15409aa29 100644
--- a/ui/goose2/src/shared/i18n/locales/en/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/en/settings.json
@@ -124,7 +124,48 @@
       "spanish": "Spanish",
       "system": "System default ({{language}})"
     },
-    "title": "General"
+    "title": "General",
+    "voiceInput": {
+      "label": "Voice Input",
+      "description": "Configure voice dictation for hands-free input.",
+      "providerLabel": "Transcription Provider",
+      "disabled": "Disabled",
+      "active": "Active",
+      "notConfiguredSuffix": "(not configured)",
+      "placeholder": "Select a provider",
+      "modelLabel": "Model",
+      "apiKeyLabel": "API Key",
+      "apiKeyDescription": "Enter your API key for this provider.",
+      "apiKeyPlaceholder": "sk-...",
+      "apiKeyConfigured": "API key configured",
+      "addApiKey": "Add API key",
+      "updateApiKey": "Update API key",
+      "removeApiKey": "Remove API key",
+      "localModelLabel": "Local Whisper Model",
+      "localModelUnavailable": "Local model download is not yet available. Use the Goose CLI to download a Whisper model first.",
+      "download": "Download",
+      "recommended": "Recommended",
+      "microphoneLabel": "Microphone",
+      "microphoneDescription": "Choose which microphone to use for voice input.",
+      "microphoneUnavailable": "Microphone access is not available in this environment.",
+      "microphoneAccessPrompt": "Click \"Grant access\" to allow microphone use.",
+      "grantMicrophone": "Grant access",
+      "systemMicrophone": "System default",
+      "unknownMicrophone": "Unknown microphone",
+      "autoSubmitLabel": "Auto-submit Phrases",
+      "autoSubmitDescription": "Comma-separated words that trigger automatic send (e.g. \"submit\").",
+      "providers": {
+        "openai": "OpenAI Whisper",
+        "groq": "Groq",
+        "elevenlabs": "ElevenLabs",
+        "local": "Local Whisper"
+      },
+      "providerSetupHint": "This provider uses your main provider config. Check {{settingsPath}} to configure it.",
+      "downloadProgress": "Downloading... {{percent}}%",
+      "loadError": "Failed to load voice settings.",
+      "saveError": "Failed to save.",
+      "deleteError": "Failed to delete."
+    }
   },
   "nav": {
     "about": "About",
@@ -134,7 +175,8 @@
     "general": "General",
     "projects": "Projects",
     "extensions": "Extensions",
-    "providers": "Providers"
+    "providers": "Providers",
+    "voice": "Voice"
   },
   "projects": {
     "description": "Manage your projects.",
diff --git a/ui/goose2/src/shared/i18n/locales/es/chat.json b/ui/goose2/src/shared/i18n/locales/es/chat.json
index 3a5760189e23..5bd93d8a560d 100644
--- a/ui/goose2/src/shared/i18n/locales/es/chat.json
+++ b/ui/goose2/src/shared/i18n/locales/es/chat.json
@@ -169,7 +169,11 @@
     "selectProject": "Seleccionar proyecto",
     "sendMessage": "Enviar mensaje",
     "stopGeneration": "Detener generación",
-    "voiceInputSoon": "Entrada de voz (pronto)"
+    "voiceInput": "Dictado por voz",
+    "voiceInputDisabled": "Configura un proveedor de voz en Ajustes para activar el dictado",
+    "voiceInputRecording": "Escuchando...",
+    "voiceInputTranscribing": "Transcribiendo...",
+    "voiceInputAutoSubmitHint": "Di \"enviar\" para enviar"
   },
   "tools": {
     "fileNotFound": "Archivo no encontrado: {{path}}",
diff --git a/ui/goose2/src/shared/i18n/locales/es/settings.json b/ui/goose2/src/shared/i18n/locales/es/settings.json
index 8b2b85236ece..33bef38d3078 100644
--- a/ui/goose2/src/shared/i18n/locales/es/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/es/settings.json
@@ -124,7 +124,48 @@
       "spanish": "Español",
       "system": "Predeterminado del sistema ({{language}})"
     },
-    "title": "General"
+    "title": "General",
+    "voiceInput": {
+      "label": "Entrada de voz",
+      "description": "Configura el dictado por voz para entrada manos libres.",
+      "providerLabel": "Proveedor de transcripción",
+      "disabled": "Desactivado",
+      "active": "Activo",
+      "notConfiguredSuffix": "(no configurado)",
+      "placeholder": "Selecciona un proveedor",
+      "modelLabel": "Modelo",
+      "apiKeyLabel": "Clave API",
+      "apiKeyDescription": "Ingresa tu clave API para este proveedor.",
+      "apiKeyPlaceholder": "sk-...",
+      "apiKeyConfigured": "Clave API configurada",
+      "addApiKey": "Agregar clave API",
+      "updateApiKey": "Actualizar clave API",
+      "removeApiKey": "Eliminar clave API",
+      "localModelLabel": "Modelo Whisper local",
+      "localModelUnavailable": "La descarga de modelos locales aún no está disponible. Usa la CLI de Goose para descargar un modelo Whisper primero.",
+      "download": "Descargar",
+      "recommended": "Recomendado",
+      "microphoneLabel": "Micrófono",
+      "microphoneDescription": "Elige qué micrófono usar para la entrada de voz.",
+      "microphoneUnavailable": "El acceso al micrófono no está disponible en este entorno.",
+      "microphoneAccessPrompt": "Haz clic en \"Permitir acceso\" para usar el micrófono.",
+      "grantMicrophone": "Permitir acceso",
+      "systemMicrophone": "Predeterminado del sistema",
+      "unknownMicrophone": "Micrófono desconocido",
+      "autoSubmitLabel": "Frases de envío automático",
+      "autoSubmitDescription": "Palabras separadas por coma que activan el envío automático (ej. \"enviar\").",
+      "providers": {
+        "openai": "OpenAI Whisper",
+        "groq": "Groq",
+        "elevenlabs": "ElevenLabs",
+        "local": "Whisper local"
+      },
+      "providerSetupHint": "Este proveedor usa tu configuración principal. Revisa {{settingsPath}} para configurarlo.",
+      "downloadProgress": "Descargando... {{percent}}%",
+      "loadError": "Error al cargar ajustes de voz.",
+      "saveError": "Error al guardar.",
+      "deleteError": "Error al eliminar."
+    }
   },
   "nav": {
     "about": "Acerca de",
@@ -134,7 +175,8 @@
     "general": "General",
     "projects": "Proyectos",
     "extensions": "Extensiones",
-    "providers": "Proveedores"
+    "providers": "Proveedores",
+    "voice": "Voz"
   },
   "projects": {
     "description": "Administra tus proyectos.",
diff --git a/ui/goose2/src/shared/types/dictation.ts b/ui/goose2/src/shared/types/dictation.ts
new file mode 100644
index 000000000000..acf617b3fec5
--- /dev/null
+++ b/ui/goose2/src/shared/types/dictation.ts
@@ -0,0 +1,51 @@
+export type DictationProvider = "openai" | "groq" | "elevenlabs" | "local";
+
+export interface DictationModelOption {
+  id: string;
+  label: string;
+  description: string;
+}
+
+export interface DictationProviderStatus {
+  configured: boolean;
+  host?: string | null;
+  description: string;
+  usesProviderConfig: boolean;
+  settingsPath?: string | null;
+  configKey?: string | null;
+  modelConfigKey?: string | null;
+  defaultModel?: string | null;
+  selectedModel?: string | null;
+  availableModels: DictationModelOption[];
+}
+
+export interface DictationTranscribeResponse {
+  text: string;
+}
+
+export type MicrophonePermissionStatus =
+  | "not_determined"
+  | "authorized"
+  | "denied"
+  | "restricted"
+  | "unsupported";
+
+export interface WhisperModelStatus {
+  id: string;
+  sizeMb: number;
+  url: string;
+  description: string;
+  downloaded: boolean;
+  recommended: boolean;
+}
+
+export interface DictationDownloadProgress {
+  modelId: string;
+  status: string;
+  bytesDownloaded: number;
+  totalBytes: number;
+  progressPercent: number;
+  speedBps?: number | null;
+  etaSeconds?: number | null;
+  error?: string | null;
+}

From 4c32ee2fb188f62bff8eb1b9fa3fbf82d80981bc Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 11:08:56 -0700
Subject: [PATCH 02/30] chore(goose-acp): regenerate acp-schema.json with
 dictation methods

Picks up DictationTranscribeRequest/Response, DictationConfigRequest/Response,
and DictationProviderStatusEntry entries. Required for the @aaif/goose-sdk
TypeScript generator in ui/sdk to see the new methods.
---
 crates/goose-acp/acp-schema.json | 178 +++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)

diff --git a/crates/goose-acp/acp-schema.json b/crates/goose-acp/acp-schema.json
index 0f0db1759a37..547b93a6f009 100644
--- a/crates/goose-acp/acp-schema.json
+++ b/crates/goose-acp/acp-schema.json
@@ -607,6 +607,150 @@
       "x-side": "agent",
       "x-method": "_goose/session/unarchive"
     },
+    "DictationTranscribeRequest": {
+      "type": "object",
+      "properties": {
+        "audio": {
+          "type": "string",
+          "description": "Base64-encoded audio data"
+        },
+        "mimeType": {
+          "type": "string",
+          "description": "MIME type (e.g. \"audio/wav\", \"audio/webm\")"
+        },
+        "provider": {
+          "type": "string",
+          "description": "Provider to use: \"openai\", \"groq\", \"elevenlabs\", or \"local\""
+        }
+      },
+      "required": [
+        "audio",
+        "mimeType",
+        "provider"
+      ],
+      "description": "Transcribe audio via a dictation provider.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/transcribe"
+    },
+    "DictationTranscribeResponse": {
+      "type": "object",
+      "properties": {
+        "text": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "text"
+      ],
+      "description": "Transcription result.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/transcribe"
+    },
+    "DictationConfigRequest": {
+      "type": "object",
+      "description": "Get the configuration status of all dictation providers.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/config"
+    },
+    "DictationConfigResponse": {
+      "type": "object",
+      "properties": {
+        "providers": {
+          "type": "object",
+          "additionalProperties": {
+            "$ref": "#/$defs/DictationProviderStatusEntry"
+          }
+        }
+      },
+      "required": [
+        "providers"
+      ],
+      "description": "Dictation config response — map of provider name to status.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/config"
+    },
+    "DictationProviderStatusEntry": {
+      "type": "object",
+      "properties": {
+        "configured": {
+          "type": "boolean"
+        },
+        "host": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "description": {
+          "type": "string"
+        },
+        "usesProviderConfig": {
+          "type": "boolean"
+        },
+        "settingsPath": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "configKey": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "modelConfigKey": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "defaultModel": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "selectedModel": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "availableModels": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/DictationModelOption"
+          },
+          "default": []
+        }
+      },
+      "required": [
+        "configured",
+        "description",
+        "usesProviderConfig"
+      ],
+      "description": "Per-provider configuration status."
+    },
+    "DictationModelOption": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "label": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "label",
+        "description"
+      ]
+    },
     "ExtRequest": {
       "properties": {
         "id": {
@@ -807,6 +951,24 @@
                   ],
                   "description": "Params for _goose/session/unarchive",
                   "title": "UnarchiveSessionRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationTranscribeRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/transcribe",
+                  "title": "DictationTranscribeRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationConfigRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/config",
+                  "title": "DictationConfigRequest"
                 }
               ]
             },
@@ -933,6 +1095,22 @@
                         }
                       ],
                       "title": "ImportSessionResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationTranscribeResponse"
+                        }
+                      ],
+                      "title": "DictationTranscribeResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationConfigResponse"
+                        }
+                      ],
+                      "title": "DictationConfigResponse"
                     }
                   ]
                 },

From 0332e18e76c42df0fb131f36cfa3db3e182f89de Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 11:17:16 -0700
Subject: [PATCH 03/30] refactor(goose2): call dictation config/transcribe via
 SDK client

Replaces Tauri invoke() with client.goose.GooseDictationConfig() and
GooseDictationTranscribe() for the two ACP methods registered on the
goose server. Matches the post-8549/8582 pattern: frontend talks
directly to goose serve over WebSocket, no Tauri middleware.

The remaining seven functions in dictation.ts still call invoke() for
Tauri commands that no longer exist; those migrate to ACP methods added
in a later commit.
---
 .../shared/api/__tests__/dictation.test.ts    | 49 +++++++++++++++++++
 ui/goose2/src/shared/api/dictation.ts         | 16 +++---
 2 files changed, 58 insertions(+), 7 deletions(-)
 create mode 100644 ui/goose2/src/shared/api/__tests__/dictation.test.ts

diff --git a/ui/goose2/src/shared/api/__tests__/dictation.test.ts b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
new file mode 100644
index 000000000000..b4d7cd501237
--- /dev/null
+++ b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
@@ -0,0 +1,49 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { getDictationConfig, transcribeDictation } from "../dictation";
+import { getClient } from "../acpConnection";
+
+vi.mock("../acpConnection", () => ({
+  getClient: vi.fn(),
+}));
+
+describe("dictation SDK wiring", () => {
+  let client: any;
+  beforeEach(() => {
+    client = {
+      goose: {
+        GooseDictationConfig: vi.fn().mockResolvedValue({
+          providers: {
+            openai: {
+              configured: true,
+              description: "OpenAI transcription",
+              usesProviderConfig: true,
+              availableModels: [],
+            },
+          },
+        }),
+        GooseDictationTranscribe: vi.fn().mockResolvedValue({ text: "hello" }),
+      },
+    };
+    vi.mocked(getClient).mockResolvedValue(client);
+  });
+
+  it("getDictationConfig calls GooseDictationConfig and returns providers map", async () => {
+    const result = await getDictationConfig();
+    expect(client.goose.GooseDictationConfig).toHaveBeenCalledWith({});
+    expect(result.openai.configured).toBe(true);
+  });
+
+  it("transcribeDictation forwards audio + mimeType + provider", async () => {
+    const result = await transcribeDictation({
+      audio: "base64==",
+      mimeType: "audio/webm",
+      provider: "openai" as any,
+    });
+    expect(client.goose.GooseDictationTranscribe).toHaveBeenCalledWith({
+      audio: "base64==",
+      mimeType: "audio/webm",
+      provider: "openai",
+    });
+    expect(result.text).toBe("hello");
+  });
+});
diff --git a/ui/goose2/src/shared/api/dictation.ts b/ui/goose2/src/shared/api/dictation.ts
index 4473225b2684..793503f3c1dc 100644
--- a/ui/goose2/src/shared/api/dictation.ts
+++ b/ui/goose2/src/shared/api/dictation.ts
@@ -7,11 +7,14 @@ import type {
   MicrophonePermissionStatus,
   WhisperModelStatus,
 } from "@/shared/types/dictation";
+import { getClient } from "./acpConnection";
 
 export async function getDictationConfig(): Promise<
   Record<DictationProvider, DictationProviderStatus>
 > {
-  return invoke("get_dictation_config");
+  const client = await getClient();
+  const response = await client.goose.GooseDictationConfig({});
+  return response.providers as Record<DictationProvider, DictationProviderStatus>;
 }
 
 export async function transcribeDictation(request: {
@@ -19,12 +22,11 @@ export async function transcribeDictation(request: {
   mimeType: string;
   provider: DictationProvider;
 }): Promise<DictationTranscribeResponse> {
-  return invoke("transcribe_dictation", {
-    request: {
-      audio: request.audio,
-      mimeType: request.mimeType,
-      provider: request.provider,
-    },
+  const client = await getClient();
+  return client.goose.GooseDictationTranscribe({
+    audio: request.audio,
+    mimeType: request.mimeType,
+    provider: request.provider,
   });
 }
 

From 72601fbc9e645c4b9b201d4d778cd5e658a950da Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 11:34:33 -0700
Subject: [PATCH 04/30] feat(goose-acp): add dictation model management ACP
 methods

Adds six custom methods so the goose2 frontend can list, download,
track, cancel, delete, and select local Whisper models through the
same WebSocket channel it already uses for transcription:

  _goose/dictation/models/list
  _goose/dictation/models/download
  _goose/dictation/models/download/progress
  _goose/dictation/models/cancel
  _goose/dictation/models/delete
  _goose/dictation/model/select

All local-model operations are gated on the local-inference feature;
without it they return "Local inference not enabled". The select method
accepts any dictation provider (openai, groq, elevenlabs, local) and
writes to the appropriate config key.

Replaces the previous plan to expose these as Tauri commands -- following
the post-8549/8582 pattern of ACP-from-frontend-direct.

Signed-off-by: tulsi <tulsi@block.xyz>
---
 crates/goose-acp/acp-meta.json          |  30 +++
 crates/goose-acp/acp-schema.json        | 249 ++++++++++++++++++++++++
 crates/goose-acp/src/server.rs          | 233 ++++++++++++++++++++--
 crates/goose-sdk/src/custom_requests.rs |  88 +++++++++
 4 files changed, 586 insertions(+), 14 deletions(-)

diff --git a/crates/goose-acp/acp-meta.json b/crates/goose-acp/acp-meta.json
index 3cd63e5f726f..75f28ef60a98 100644
--- a/crates/goose-acp/acp-meta.json
+++ b/crates/goose-acp/acp-meta.json
@@ -114,6 +114,36 @@
       "method": "_goose/dictation/config",
       "requestType": "DictationConfigRequest",
       "responseType": "DictationConfigResponse"
+    },
+    {
+      "method": "_goose/dictation/models/list",
+      "requestType": "DictationModelsListRequest",
+      "responseType": "DictationModelsListResponse"
+    },
+    {
+      "method": "_goose/dictation/models/download",
+      "requestType": "DictationModelDownloadRequest",
+      "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/models/download/progress",
+      "requestType": "DictationModelDownloadProgressRequest",
+      "responseType": "DictationModelDownloadProgressResponse"
+    },
+    {
+      "method": "_goose/dictation/models/cancel",
+      "requestType": "DictationModelCancelRequest",
+      "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/models/delete",
+      "requestType": "DictationModelDeleteRequest",
+      "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/model/select",
+      "requestType": "DictationModelSelectRequest",
+      "responseType": "EmptyResponse"
     }
   ]
 }
diff --git a/crates/goose-acp/acp-schema.json b/crates/goose-acp/acp-schema.json
index 547b93a6f009..821de4145e74 100644
--- a/crates/goose-acp/acp-schema.json
+++ b/crates/goose-acp/acp-schema.json
@@ -751,6 +751,185 @@
         "description"
       ]
     },
+    "DictationModelsListRequest": {
+      "type": "object",
+      "description": "List available local Whisper models with their download status.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/list"
+    },
+    "DictationModelsListResponse": {
+      "type": "object",
+      "properties": {
+        "models": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/DictationLocalModelStatus"
+          }
+        }
+      },
+      "required": [
+        "models"
+      ],
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/list"
+    },
+    "DictationLocalModelStatus": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "label": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "sizeMb": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "downloaded": {
+          "type": "boolean"
+        },
+        "downloadInProgress": {
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "id",
+        "label",
+        "description",
+        "sizeMb",
+        "downloaded",
+        "downloadInProgress"
+      ]
+    },
+    "DictationModelDownloadRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Kick off a background download of a local Whisper model.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/download"
+    },
+    "DictationModelDownloadProgressRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Poll the progress of an in-flight download.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/download/progress"
+    },
+    "DictationModelDownloadProgressResponse": {
+      "type": "object",
+      "properties": {
+        "progress": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/DictationDownloadProgress"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "description": "None when no download is active for this model id."
+        }
+      },
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/download/progress"
+    },
+    "DictationDownloadProgress": {
+      "type": "object",
+      "properties": {
+        "bytesDownloaded": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "totalBytes": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "progressPercent": {
+          "type": "number",
+          "format": "float"
+        },
+        "status": {
+          "type": "string",
+          "description": "serde lowercase of DownloadStatus: \"downloading\" | \"completed\" | \"failed\" | \"cancelled\""
+        },
+        "error": {
+          "type": [
+            "string",
+            "null"
+          ]
+        }
+      },
+      "required": [
+        "bytesDownloaded",
+        "totalBytes",
+        "progressPercent",
+        "status"
+      ]
+    },
+    "DictationModelCancelRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Cancel an in-flight download.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/cancel"
+    },
+    "DictationModelDeleteRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Delete a downloaded local Whisper model from disk.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/delete"
+    },
+    "DictationModelSelectRequest": {
+      "type": "object",
+      "properties": {
+        "provider": {
+          "type": "string"
+        },
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "provider",
+        "modelId"
+      ],
+      "description": "Persist the user's model selection for a given provider.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/model/select"
+    },
     "ExtRequest": {
       "properties": {
         "id": {
@@ -969,6 +1148,60 @@
                   ],
                   "description": "Params for _goose/dictation/config",
                   "title": "DictationConfigRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelsListRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/list",
+                  "title": "DictationModelsListRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelDownloadRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/download",
+                  "title": "DictationModelDownloadRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelDownloadProgressRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/download/progress",
+                  "title": "DictationModelDownloadProgressRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelCancelRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/cancel",
+                  "title": "DictationModelCancelRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelDeleteRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/delete",
+                  "title": "DictationModelDeleteRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelSelectRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/model/select",
+                  "title": "DictationModelSelectRequest"
                 }
               ]
             },
@@ -1111,6 +1344,22 @@
                         }
                       ],
                       "title": "DictationConfigResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationModelsListResponse"
+                        }
+                      ],
+                      "title": "DictationModelsListResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationModelDownloadProgressResponse"
+                        }
+                      ],
+                      "title": "DictationModelDownloadProgressResponse"
                     }
                   ]
                 },
diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index 6176b02f8f85..40cf27bcef7d 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -75,6 +75,9 @@ pub type AcpProviderFactory = Arc<
 
 const DEFAULT_PROVIDER_ID: &str = "goose";
 const DEFAULT_PROVIDER_LABEL: &str = "Goose (Default)";
+const OPENAI_TRANSCRIPTION_MODEL_CONFIG_KEY: &str = "OPENAI_TRANSCRIPTION_MODEL";
+const GROQ_TRANSCRIPTION_MODEL_CONFIG_KEY: &str = "GROQ_TRANSCRIPTION_MODEL";
+const ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY: &str = "ELEVENLABS_TRANSCRIPTION_MODEL";
 const OPENAI_TRANSCRIPTION_MODEL: &str = "whisper-1";
 const GROQ_TRANSCRIPTION_MODEL: &str = "whisper-large-v3-turbo";
 const ELEVENLABS_TRANSCRIPTION_MODEL: &str = "scribe_v1";
@@ -2921,6 +2924,13 @@ impl GooseAcpAgent {
         req: DictationTranscribeRequest,
     ) -> Result<DictationTranscribeResponse, sacp::Error> {
         use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
+        let config = goose::config::Config::global();
+
+        #[cfg(not(feature = "local-inference"))]
+        if req.provider == "local" {
+            return Err(sacp::Error::invalid_params()
+                .data("Local inference is not available in this build"));
+        }
 
         let provider: DictationProvider = serde_json::from_value(serde_json::Value::String(
             req.provider.clone(),
@@ -2952,10 +2962,12 @@ impl GooseAcpAgent {
 
         let text = match provider {
             DictationProvider::OpenAI => {
+                let model = dictation_selected_model(config, DictationProvider::OpenAI)
+                    .unwrap_or_else(|| OPENAI_TRANSCRIPTION_MODEL.to_string());
                 transcribe_with_provider(
                     DictationProvider::OpenAI,
                     "model".to_string(),
-                    "whisper-1".to_string(),
+                    model,
                     audio_bytes,
                     extension,
                     &req.mime_type,
@@ -2963,10 +2975,12 @@ impl GooseAcpAgent {
                 .await
             }
             DictationProvider::Groq => {
+                let model = dictation_selected_model(config, DictationProvider::Groq)
+                    .unwrap_or_else(|| GROQ_TRANSCRIPTION_MODEL.to_string());
                 transcribe_with_provider(
                     DictationProvider::Groq,
                     "model".to_string(),
-                    "whisper-large-v3-turbo".to_string(),
+                    model,
                     audio_bytes,
                     extension,
                     &req.mime_type,
@@ -2974,10 +2988,12 @@ impl GooseAcpAgent {
                 .await
             }
             DictationProvider::ElevenLabs => {
+                let model = dictation_selected_model(config, DictationProvider::ElevenLabs)
+                    .unwrap_or_else(|| ELEVENLABS_TRANSCRIPTION_MODEL.to_string());
                 transcribe_with_provider(
                     DictationProvider::ElevenLabs,
                     "model_id".to_string(),
-                    "scribe_v1".to_string(),
+                    model,
                     audio_bytes,
                     extension,
                     &req.mime_type,
@@ -2986,11 +3002,6 @@ impl GooseAcpAgent {
             }
             #[cfg(feature = "local-inference")]
             DictationProvider::Local => transcribe_local(audio_bytes).await,
-            #[cfg(not(feature = "local-inference"))]
-            DictationProvider::Local => {
-                return Err(sacp::Error::invalid_params()
-                    .data("Local inference is not available in this build"));
-            }
         }
         .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
 
@@ -3043,15 +3054,202 @@ impl GooseAcpAgent {
 
         Ok(DictationConfigResponse { providers })
     }
+
+    #[custom_method(DictationModelsListRequest)]
+    async fn on_dictation_models_list(
+        &self,
+        _req: DictationModelsListRequest,
+    ) -> Result<DictationModelsListResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::{get_download_manager, DownloadStatus};
+
+            let manager = get_download_manager();
+            let models = whisper::available_models()
+                .iter()
+                .map(|model| DictationLocalModelStatus {
+                    id: model.id.to_string(),
+                    label: model.id.to_string(),
+                    description: model.description.to_string(),
+                    size_mb: model.size_mb,
+                    downloaded: model.is_downloaded(),
+                    download_in_progress: manager
+                        .get_progress(model.id)
+                        .map(|progress| progress.status == DownloadStatus::Downloading)
+                        .unwrap_or(false),
+                })
+                .collect();
+
+            return Ok(DictationModelsListResponse { models });
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Ok(DictationModelsListResponse::default())
+    }
+
+    #[custom_method(DictationModelDownloadRequest)]
+    async fn on_dictation_model_download(
+        &self,
+        _req: DictationModelDownloadRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::get_download_manager;
+
+            let model = whisper::get_model(&_req.model_id)
+                .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
+            let manager = get_download_manager();
+            let model_id_for_config = model.id.to_string();
+
+            manager
+                .download_model(
+                    model.id.to_string(),
+                    model.url.to_string(),
+                    model.local_path(),
+                    Some(Box::new(move || {
+                        if let Err(e) = goose::config::Config::global().set_param(
+                            whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY,
+                            model_id_for_config.clone(),
+                        ) {
+                            error!("Failed to save LOCAL_WHISPER_MODEL after download: {}", e);
+                        }
+                    })),
+                )
+                .await
+                .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+            return Ok(EmptyResponse {});
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Err(sacp::Error::invalid_params().data("Local inference not enabled"))
+    }
+
+    #[custom_method(DictationModelDownloadProgressRequest)]
+    async fn on_dictation_model_download_progress(
+        &self,
+        _req: DictationModelDownloadProgressRequest,
+    ) -> Result<DictationModelDownloadProgressResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::get_download_manager;
+
+            let manager = get_download_manager();
+            let progress =
+                manager
+                    .get_progress(&_req.model_id)
+                    .map(|progress| DictationDownloadProgress {
+                        bytes_downloaded: progress.bytes_downloaded,
+                        total_bytes: progress.total_bytes,
+                        progress_percent: progress.progress_percent,
+                        status: serde_json::to_value(&progress.status)
+                            .ok()
+                            .and_then(|value| value.as_str().map(ToOwned::to_owned))
+                            .unwrap_or_else(|| "unknown".to_string()),
+                        error: progress.error,
+                    });
+
+            return Ok(DictationModelDownloadProgressResponse { progress });
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Ok(DictationModelDownloadProgressResponse { progress: None })
+    }
+
+    #[custom_method(DictationModelCancelRequest)]
+    async fn on_dictation_model_cancel(
+        &self,
+        _req: DictationModelCancelRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::get_download_manager;
+
+            let manager = get_download_manager();
+            manager
+                .cancel_download(&_req.model_id)
+                .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+            return Ok(EmptyResponse {});
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Err(sacp::Error::invalid_params().data("Local inference not enabled"))
+    }
+
+    #[custom_method(DictationModelDeleteRequest)]
+    async fn on_dictation_model_delete(
+        &self,
+        _req: DictationModelDeleteRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            let model = whisper::get_model(&_req.model_id)
+                .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
+            let path = model.local_path();
+
+            if !path.exists() {
+                return Err(sacp::Error::invalid_params().data("Model not downloaded"));
+            }
+
+            std::fs::remove_file(path)
+                .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+            return Ok(EmptyResponse {});
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Err(sacp::Error::invalid_params().data("Local inference not enabled"))
+    }
+
+    #[custom_method(DictationModelSelectRequest)]
+    async fn on_dictation_model_select(
+        &self,
+        req: DictationModelSelectRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(not(feature = "local-inference"))]
+        if req.provider == "local" {
+            return Err(sacp::Error::invalid_params().data("Local inference not enabled"));
+        }
+
+        let provider: DictationProvider = serde_json::from_value(serde_json::Value::String(
+            req.provider.clone(),
+        ))
+        .map_err(|_| {
+            sacp::Error::invalid_params().data(format!("Unknown provider: {}", req.provider))
+        })?;
+
+        let key = match provider {
+            DictationProvider::OpenAI => OPENAI_TRANSCRIPTION_MODEL_CONFIG_KEY,
+            DictationProvider::Groq => GROQ_TRANSCRIPTION_MODEL_CONFIG_KEY,
+            DictationProvider::ElevenLabs => ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY,
+            #[cfg(feature = "local-inference")]
+            DictationProvider::Local => {
+                if whisper::get_model(&req.model_id).is_none() {
+                    return Err(sacp::Error::invalid_params().data("Unknown model id"));
+                }
+                whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY
+            }
+        };
+
+        goose::config::Config::global()
+            .set_param(key, req.model_id)
+            .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+        Ok(EmptyResponse {})
+    }
 }
 
 fn dictation_model_config_key(provider: DictationProvider) -> Option<String> {
-    #[cfg(feature = "local-inference")]
-    if provider == DictationProvider::Local {
-        return Some(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY.to_string());
+    match provider {
+        DictationProvider::OpenAI => Some(OPENAI_TRANSCRIPTION_MODEL_CONFIG_KEY.to_string()),
+        DictationProvider::Groq => Some(GROQ_TRANSCRIPTION_MODEL_CONFIG_KEY.to_string()),
+        DictationProvider::ElevenLabs => {
+            Some(ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY.to_string())
+        }
+        #[cfg(feature = "local-inference")]
+        DictationProvider::Local => Some(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY.to_string()),
     }
-
-    None
 }
 
 fn dictation_default_model(provider: DictationProvider) -> Option<String> {
@@ -3075,7 +3273,14 @@ fn dictation_selected_model(config: &Config, provider: DictationProvider) -> Opt
             .or_else(|| dictation_default_model(provider));
     }
 
-    dictation_default_model(provider)
+    dictation_model_config_key(provider)
+        .and_then(|key| {
+            config
+                .get(&key, false)
+                .ok()
+                .and_then(|value| value.as_str().map(str::to_owned))
+        })
+        .or_else(|| dictation_default_model(provider))
 }
 
 fn dictation_available_models(provider: DictationProvider) -> Vec<DictationModelOption> {
diff --git a/crates/goose-sdk/src/custom_requests.rs b/crates/goose-sdk/src/custom_requests.rs
index 46359100a3bf..af14fd9cc189 100644
--- a/crates/goose-sdk/src/custom_requests.rs
+++ b/crates/goose-sdk/src/custom_requests.rs
@@ -372,3 +372,91 @@ pub struct DictationConfigResponse {
 /// Empty success response for operations that return no data.
 #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
 pub struct EmptyResponse {}
+
+/// List available local Whisper models with their download status.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(
+    method = "_goose/dictation/models/list",
+    response = DictationModelsListResponse
+)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelsListRequest {}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelsListResponse {
+    pub models: Vec<DictationLocalModelStatus>,
+}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationLocalModelStatus {
+    pub id: String,
+    pub label: String,
+    pub description: String,
+    pub size_mb: u32,
+    pub downloaded: bool,
+    pub download_in_progress: bool,
+}
+
+/// Kick off a background download of a local Whisper model.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/models/download", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDownloadRequest {
+    pub model_id: String,
+}
+
+/// Poll the progress of an in-flight download.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(
+    method = "_goose/dictation/models/download/progress",
+    response = DictationModelDownloadProgressResponse
+)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDownloadProgressRequest {
+    pub model_id: String,
+}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDownloadProgressResponse {
+    /// None when no download is active for this model id.
+    pub progress: Option<DictationDownloadProgress>,
+}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationDownloadProgress {
+    pub bytes_downloaded: u64,
+    pub total_bytes: u64,
+    pub progress_percent: f32,
+    /// serde lowercase of DownloadStatus: "downloading" | "completed" | "failed" | "cancelled"
+    pub status: String,
+    pub error: Option<String>,
+}
+
+/// Cancel an in-flight download.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/models/cancel", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelCancelRequest {
+    pub model_id: String,
+}
+
+/// Delete a downloaded local Whisper model from disk.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/models/delete", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDeleteRequest {
+    pub model_id: String,
+}
+
+/// Persist the user's model selection for a given provider.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/model/select", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelSelectRequest {
+    pub provider: String,
+    pub model_id: String,
+}

From 1b9d22fed31082cc1c24169c0074d5e5843d2412 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 11:53:57 -0700
Subject: [PATCH 05/30] refactor(goose2): route local-model dictation through
 SDK client
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Migrates seven dictation.ts functions off Tauri invoke() onto the
regenerated @aaif/goose-sdk client:
  saveDictationModelSelection, listDictationLocalModels,
  downloadDictationLocalModel, getDictationLocalModelDownloadProgress,
  cancelDictationLocalModelDownload, deleteDictationLocalModel

Leaves alone:
  saveDictationProviderSecret / deleteDictationProviderSecret — use
    generic save_provider_field / delete_provider_config Tauri commands
  getMicrophonePermissionStatus / requestMicrophonePermission — OS-bound
    browser APIs handle mic prompt in VoiceInputSettings

Each migrated function uses a type cast at the SDK boundary because the
regenerated types don't fully overlap with the hand-written local types
(e.g., WhisperModelStatus has url/recommended fields the SDK's
DictationLocalModelStatus doesn't). Consumers that read missing fields
will get undefined at runtime; end-to-end verification in a later task
will surface any breakage.

Signed-off-by: tulsi <tulsi@block.xyz>
---
 .../shared/api/__tests__/dictation.test.ts    | 87 ++++++++++++++++++-
 ui/goose2/src/shared/api/dictation.ts         | 22 +++--
 2 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/ui/goose2/src/shared/api/__tests__/dictation.test.ts b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
index b4d7cd501237..27f501a851b8 100644
--- a/ui/goose2/src/shared/api/__tests__/dictation.test.ts
+++ b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
@@ -1,5 +1,14 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
-import { getDictationConfig, transcribeDictation } from "../dictation";
+import {
+  cancelDictationLocalModelDownload,
+  deleteDictationLocalModel,
+  downloadDictationLocalModel,
+  getDictationConfig,
+  getDictationLocalModelDownloadProgress,
+  listDictationLocalModels,
+  saveDictationModelSelection,
+  transcribeDictation,
+} from "../dictation";
 import { getClient } from "../acpConnection";
 
 vi.mock("../acpConnection", () => ({
@@ -46,4 +55,80 @@ describe("dictation SDK wiring", () => {
     });
     expect(result.text).toBe("hello");
   });
+
+  it("saveDictationModelSelection calls GooseDictationModelSelect", async () => {
+    client.goose.GooseDictationModelSelect = vi.fn().mockResolvedValue({});
+    await saveDictationModelSelection("local" as any, "tiny");
+    expect(client.goose.GooseDictationModelSelect).toHaveBeenCalledWith({
+      provider: "local",
+      modelId: "tiny",
+    });
+  });
+
+  it("listDictationLocalModels returns the models array", async () => {
+    client.goose.GooseDictationModelsList = vi.fn().mockResolvedValue({
+      models: [
+        {
+          id: "tiny",
+          description: "Tiny",
+          sizeMb: 75,
+          downloaded: true,
+          downloadInProgress: false,
+        },
+      ],
+    });
+    const result = await listDictationLocalModels();
+    expect(client.goose.GooseDictationModelsList).toHaveBeenCalledWith({});
+    expect(result).toHaveLength(1);
+    expect(result[0].id).toBe("tiny");
+  });
+
+  it("downloadDictationLocalModel forwards modelId", async () => {
+    client.goose.GooseDictationModelsDownload = vi.fn().mockResolvedValue({});
+    await downloadDictationLocalModel("tiny");
+    expect(client.goose.GooseDictationModelsDownload).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+
+  it("getDictationLocalModelDownloadProgress returns progress or null", async () => {
+    client.goose.GooseDictationModelsDownloadProgress = vi.fn().mockResolvedValue({
+      progress: {
+        bytesDownloaded: 100,
+        totalBytes: 1000,
+        progressPercent: 10,
+        status: "downloading",
+        error: null,
+      },
+    });
+    const result = await getDictationLocalModelDownloadProgress("tiny");
+    expect(result?.bytesDownloaded).toBe(100);
+    expect(client.goose.GooseDictationModelsDownloadProgress).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+
+  it("getDictationLocalModelDownloadProgress returns null when no download", async () => {
+    client.goose.GooseDictationModelsDownloadProgress = vi.fn().mockResolvedValue({
+      progress: undefined,
+    });
+    const result = await getDictationLocalModelDownloadProgress("tiny");
+    expect(result).toBeNull();
+  });
+
+  it("cancelDictationLocalModelDownload forwards modelId", async () => {
+    client.goose.GooseDictationModelsCancel = vi.fn().mockResolvedValue({});
+    await cancelDictationLocalModelDownload("tiny");
+    expect(client.goose.GooseDictationModelsCancel).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+
+  it("deleteDictationLocalModel forwards modelId", async () => {
+    client.goose.GooseDictationModelsDelete = vi.fn().mockResolvedValue({});
+    await deleteDictationLocalModel("tiny");
+    expect(client.goose.GooseDictationModelsDelete).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
 });
diff --git a/ui/goose2/src/shared/api/dictation.ts b/ui/goose2/src/shared/api/dictation.ts
index 793503f3c1dc..0031d120fad0 100644
--- a/ui/goose2/src/shared/api/dictation.ts
+++ b/ui/goose2/src/shared/api/dictation.ts
@@ -34,7 +34,8 @@ export async function saveDictationModelSelection(
   provider: DictationProvider,
   modelId: string,
 ): Promise<void> {
-  return invoke("save_dictation_model_selection", { provider, modelId });
+  const client = await getClient();
+  await client.goose.GooseDictationModelSelect({ provider, modelId });
 }
 
 export async function saveDictationProviderSecret(
@@ -66,31 +67,40 @@ export async function deleteDictationProviderSecret(
 export async function listDictationLocalModels(): Promise<
   WhisperModelStatus[]
 > {
-  return invoke("list_dictation_local_models");
+  const client = await getClient();
+  const response = await client.goose.GooseDictationModelsList({});
+  return response.models as unknown as WhisperModelStatus[];
 }
 
 export async function downloadDictationLocalModel(
   modelId: string,
 ): Promise<void> {
-  return invoke("download_dictation_local_model", { modelId });
+  const client = await getClient();
+  await client.goose.GooseDictationModelsDownload({ modelId });
 }
 
 export async function getDictationLocalModelDownloadProgress(
   modelId: string,
 ): Promise<DictationDownloadProgress | null> {
-  return invoke("get_dictation_local_model_download_progress", { modelId });
+  const client = await getClient();
+  const response = await client.goose.GooseDictationModelsDownloadProgress({
+    modelId,
+  });
+  return (response.progress ?? null) as DictationDownloadProgress | null;
 }
 
 export async function cancelDictationLocalModelDownload(
   modelId: string,
 ): Promise<void> {
-  return invoke("cancel_dictation_local_model_download", { modelId });
+  const client = await getClient();
+  await client.goose.GooseDictationModelsCancel({ modelId });
 }
 
 export async function deleteDictationLocalModel(
   modelId: string,
 ): Promise<void> {
-  return invoke("delete_dictation_local_model", { modelId });
+  const client = await getClient();
+  await client.goose.GooseDictationModelsDelete({ modelId });
 }
 
 export async function getMicrophonePermissionStatus(): Promise<MicrophonePermissionStatus> {

From d33a6ba9a0efaabf6eb1b122569e2c26dc4a673b Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 11:56:13 -0700
Subject: [PATCH 06/30] refactor(goose2): remove dead microphone permission
 exports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

getMicrophonePermissionStatus and requestMicrophonePermission had zero
callers after the voice-input work settled — VoiceInputSettings derives
permission status from the browser's navigator.mediaDevices.getUserMedia
directly rather than routing through Tauri. Drop the exports and the
now-unused MicrophonePermissionStatus type import.

The type itself stays defined in shared/types/dictation.ts for any
future consumer; only the Tauri-routed helpers are removed.
---
 ui/goose2/src/shared/api/dictation.ts | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/ui/goose2/src/shared/api/dictation.ts b/ui/goose2/src/shared/api/dictation.ts
index 0031d120fad0..3ace459b8523 100644
--- a/ui/goose2/src/shared/api/dictation.ts
+++ b/ui/goose2/src/shared/api/dictation.ts
@@ -4,7 +4,6 @@ import type {
   DictationProvider,
   DictationProviderStatus,
   DictationTranscribeResponse,
-  MicrophonePermissionStatus,
   WhisperModelStatus,
 } from "@/shared/types/dictation";
 import { getClient } from "./acpConnection";
@@ -102,11 +101,3 @@ export async function deleteDictationLocalModel(
   const client = await getClient();
   await client.goose.GooseDictationModelsDelete({ modelId });
 }
-
-export async function getMicrophonePermissionStatus(): Promise<MicrophonePermissionStatus> {
-  return invoke("get_microphone_permission_status");
-}
-
-export async function requestMicrophonePermission(): Promise<MicrophonePermissionStatus> {
-  return invoke("request_microphone_permission");
-}

From e47fdf9979ea877caedcdd72e380561513e06244 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 12:14:08 -0700
Subject: [PATCH 07/30] feat(goose2): local Whisper model
 download/select/delete UI

Replaces the "Local model download is not yet available" placeholder in
VoiceInputSettings with a working LocalWhisperModels component that
drives the six ACP methods added upstream: list, download, progress,
cancel, delete, select.

Per-row UI state machine:
  - not downloaded  -> Download button
  - downloading     -> progress bar + Cancel button (polls every 750ms)
  - downloaded + selected   -> "Selected" badge + Delete
  - downloaded + unselected -> Select + Delete

Progress polling auto-stops when no active downloads remain. Download
completion refreshes the model list and notifies the parent config so
the mic button in chat enables without a manual reload.

i18n keys added for EN and ES; obsolete localModelUnavailable key left
in place (unused now) to avoid gratuitous deletion.
---
 .../settings/ui/LocalWhisperModels.tsx        | 324 ++++++++++++++++++
 .../settings/ui/VoiceInputSettings.tsx        |  14 +-
 .../src/shared/i18n/locales/en/settings.json  |   5 +
 .../src/shared/i18n/locales/es/settings.json  |   5 +
 4 files changed, 340 insertions(+), 8 deletions(-)
 create mode 100644 ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx

diff --git a/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
new file mode 100644
index 000000000000..4fa3f8dadb56
--- /dev/null
+++ b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
@@ -0,0 +1,324 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { useTranslation } from "react-i18next";
+import { Button } from "@/shared/ui/button";
+import {
+  cancelDictationLocalModelDownload,
+  deleteDictationLocalModel,
+  downloadDictationLocalModel,
+  getDictationLocalModelDownloadProgress,
+  listDictationLocalModels,
+} from "@/shared/api/dictation";
+
+type LocalModel = {
+  id: string;
+  description: string;
+  sizeMb: number;
+  downloaded: boolean;
+  downloadInProgress: boolean;
+};
+
+type DownloadProgress = {
+  bytesDownloaded: number;
+  totalBytes: number;
+  progressPercent: number;
+  status: string;
+  error?: string | null;
+};
+
+const POLL_INTERVAL_MS = 750;
+
+interface LocalWhisperModelsProps {
+  selectedModelId: string;
+  onSelectModel: (modelId: string) => void | Promise<void>;
+  onModelsChanged: () => void | Promise<void>;
+}
+
+export function LocalWhisperModels({
+  selectedModelId,
+  onSelectModel,
+  onModelsChanged,
+}: LocalWhisperModelsProps) {
+  const { t } = useTranslation(["settings", "common"]);
+  const [models, setModels] = useState<LocalModel[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [downloadingIds, setDownloadingIds] = useState<Set<string>>(new Set());
+  const [progresses, setProgresses] = useState<Map<string, DownloadProgress>>(
+    new Map(),
+  );
+  const onModelsChangedRef = useRef(onModelsChanged);
+  onModelsChangedRef.current = onModelsChanged;
+
+  const refresh = useCallback(async () => {
+    try {
+      const list =
+        (await listDictationLocalModels()) as unknown as LocalModel[];
+      setModels(list);
+      setDownloadingIds((prev) => {
+        const next = new Set(prev);
+        for (const m of list) {
+          if (m.downloadInProgress) next.add(m.id);
+        }
+        return next;
+      });
+    } catch (err) {
+      setError(
+        err instanceof Error
+          ? err.message
+          : t("general.voiceInput.loadError"),
+      );
+    }
+  }, [t]);
+
+  useEffect(() => {
+    const load = async () => {
+      setLoading(true);
+      setError(null);
+      await refresh();
+      setLoading(false);
+    };
+    void load();
+  }, [refresh]);
+
+  useEffect(() => {
+    if (downloadingIds.size === 0) return;
+    let cancelled = false;
+
+    const tick = async () => {
+      const next = new Map<string, DownloadProgress>();
+      const stillActive = new Set<string>();
+      const finishedIds: string[] = [];
+
+      for (const id of downloadingIds) {
+        try {
+          const progress =
+            (await getDictationLocalModelDownloadProgress(
+              id,
+            )) as unknown as DownloadProgress | null;
+          if (!progress) {
+            finishedIds.push(id);
+            continue;
+          }
+          next.set(id, progress);
+          if (progress.status === "downloading") {
+            stillActive.add(id);
+          } else {
+            finishedIds.push(id);
+          }
+        } catch {
+          stillActive.add(id);
+        }
+      }
+      if (cancelled) return;
+      setProgresses(next);
+      if (finishedIds.length > 0) {
+        await refresh();
+        await onModelsChangedRef.current();
+      }
+      setDownloadingIds(stillActive);
+    };
+
+    const interval = window.setInterval(() => {
+      void tick();
+    }, POLL_INTERVAL_MS);
+    return () => {
+      cancelled = true;
+      window.clearInterval(interval);
+    };
+  }, [downloadingIds, refresh]);
+
+  const startDownload = useCallback(
+    async (modelId: string) => {
+      setError(null);
+      try {
+        await downloadDictationLocalModel(modelId);
+        setDownloadingIds((prev) => new Set(prev).add(modelId));
+      } catch (err) {
+        setError(
+          err instanceof Error
+            ? err.message
+            : t("general.voiceInput.saveError"),
+        );
+      }
+    },
+    [t],
+  );
+
+  const cancelDownload = useCallback(
+    async (modelId: string) => {
+      setError(null);
+      try {
+        await cancelDictationLocalModelDownload(modelId);
+      } catch (err) {
+        setError(
+          err instanceof Error
+            ? err.message
+            : t("general.voiceInput.saveError"),
+        );
+      } finally {
+        setDownloadingIds((prev) => {
+          const next = new Set(prev);
+          next.delete(modelId);
+          return next;
+        });
+        await refresh();
+      }
+    },
+    [refresh, t],
+  );
+
+  const deleteModel = useCallback(
+    async (modelId: string) => {
+      setError(null);
+      try {
+        await deleteDictationLocalModel(modelId);
+        await refresh();
+        await onModelsChanged();
+      } catch (err) {
+        setError(
+          err instanceof Error
+            ? err.message
+            : t("general.voiceInput.deleteError"),
+        );
+      }
+    },
+    [onModelsChanged, refresh, t],
+  );
+
+  if (loading) {
+    return (
+      <div className="rounded-lg border border-border px-3 py-3">
+        <p className="text-xs text-muted-foreground">
+          {t("common:labels.loading")}
+        </p>
+      </div>
+    );
+  }
+
+  if (models.length === 0) {
+    return (
+      <div className="rounded-lg border border-border px-3 py-3">
+        <p className="text-xs text-muted-foreground">
+          {t("general.voiceInput.noLocalModels")}
+        </p>
+      </div>
+    );
+  }
+
+  return (
+    <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+      <div>
+        <p className="text-xs font-medium text-foreground">
+          {t("general.voiceInput.localModelLabel")}
+        </p>
+        <p className="mt-1 text-xs text-muted-foreground">
+          {t("general.voiceInput.localModelDescription")}
+        </p>
+      </div>
+
+      <ul className="divide-y divide-border">
+        {models.map((model) => {
+          const progress = progresses.get(model.id);
+          const isDownloading =
+            downloadingIds.has(model.id) ||
+            progress?.status === "downloading" ||
+            model.downloadInProgress;
+          const isSelected =
+            model.downloaded && model.id === selectedModelId;
+          return (
+            <li
+              key={model.id}
+              className="flex items-start justify-between gap-3 py-2"
+            >
+              <div className="min-w-0 flex-1">
+                <div className="flex items-center gap-2">
+                  <p className="truncate text-xs font-medium text-foreground">
+                    {model.id}
+                  </p>
+                  <span className="text-xs text-muted-foreground">
+                    {model.sizeMb} MB
+                  </span>
+                  {isSelected ? (
+                    <span className="rounded-full bg-primary/10 px-2 py-0.5 text-[10px] font-medium text-primary">
+                      {t("general.voiceInput.selectedModel")}
+                    </span>
+                  ) : null}
+                </div>
+                <p className="mt-1 text-xs text-muted-foreground">
+                  {model.description}
+                </p>
+                {isDownloading && progress ? (
+                  <div className="mt-2 space-y-1">
+                    <div className="h-1.5 w-full overflow-hidden rounded-full bg-muted">
+                      <div
+                        className="h-full bg-primary transition-all"
+                        style={{
+                          width: `${Math.max(0, Math.min(100, progress.progressPercent))}%`,
+                        }}
+                      />
+                    </div>
+                    <p className="text-[10px] text-muted-foreground">
+                      {t("general.voiceInput.downloadProgress", {
+                        percent: Math.round(progress.progressPercent),
+                      })}
+                    </p>
+                  </div>
+                ) : null}
+                {progress?.status === "failed" && progress.error ? (
+                  <p className="mt-1 text-xs text-destructive">
+                    {progress.error}
+                  </p>
+                ) : null}
+              </div>
+
+              <div className="flex flex-shrink-0 gap-2">
+                {isDownloading ? (
+                  <Button
+                    type="button"
+                    size="sm"
+                    variant="outline-flat"
+                    onClick={() => void cancelDownload(model.id)}
+                  >
+                    {t("common:actions.cancel")}
+                  </Button>
+                ) : model.downloaded ? (
+                  <>
+                    {!isSelected ? (
+                      <Button
+                        type="button"
+                        size="sm"
+                        variant="outline-flat"
+                        onClick={() => void onSelectModel(model.id)}
+                      >
+                        {t("general.voiceInput.selectModel")}
+                      </Button>
+                    ) : null}
+                    <Button
+                      type="button"
+                      size="sm"
+                      variant="ghost"
+                      className="text-destructive hover:text-destructive"
+                      onClick={() => void deleteModel(model.id)}
+                    >
+                      {t("general.voiceInput.deleteModel")}
+                    </Button>
+                  </>
+                ) : (
+                  <Button
+                    type="button"
+                    size="sm"
+                    onClick={() => void startDownload(model.id)}
+                  >
+                    {t("general.voiceInput.download")}
+                  </Button>
+                )}
+              </div>
+            </li>
+          );
+        })}
+      </ul>
+
+      {error ? <p className="text-xs text-destructive">{error}</p> : null}
+    </div>
+  );
+}
diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
index 8ccc14908ecb..cc485cbb4c75 100644
--- a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -17,6 +17,7 @@ import type {
 } from "@/shared/types/dictation";
 import { useAudioDevices } from "@/shared/ui/ai-elements/mic-selector";
 import { Button } from "@/shared/ui/button";
+import { LocalWhisperModels } from "./LocalWhisperModels";
 import { Input } from "@/shared/ui/input";
 import {
   Select,
@@ -338,14 +339,11 @@ export function VoiceInputSettings() {
           ) : null}
 
           {selectedProvider === "local" ? (
-            <div className="rounded-lg border border-border px-3 py-3">
-              <p className="text-xs font-medium text-foreground">
-                {t("general.voiceInput.localModelLabel")}
-              </p>
-              <p className="mt-1 text-xs text-muted-foreground">
-                {t("general.voiceInput.localModelUnavailable")}
-              </p>
-            </div>
+            <LocalWhisperModels
+              selectedModelId={currentModelValue}
+              onSelectModel={(modelId) => handleModelChange(modelId)}
+              onModelsChanged={() => refreshConfig()}
+            />
           ) : (selectedStatus.availableModels ?? []).length > 0 ? (
             <div className="space-y-2 rounded-lg border border-border px-3 py-3">
               <p className="text-xs font-medium text-foreground">
diff --git a/ui/goose2/src/shared/i18n/locales/en/settings.json b/ui/goose2/src/shared/i18n/locales/en/settings.json
index e4c15409aa29..6dfbd824ace5 100644
--- a/ui/goose2/src/shared/i18n/locales/en/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/en/settings.json
@@ -142,8 +142,13 @@
       "updateApiKey": "Update API key",
       "removeApiKey": "Remove API key",
       "localModelLabel": "Local Whisper Model",
+      "localModelDescription": "Download a Whisper model to run transcription locally. Selecting a model sets it as your active local transcription model.",
       "localModelUnavailable": "Local model download is not yet available. Use the Goose CLI to download a Whisper model first.",
+      "noLocalModels": "No local Whisper models available.",
       "download": "Download",
+      "selectModel": "Select",
+      "selectedModel": "Selected",
+      "deleteModel": "Delete",
       "recommended": "Recommended",
       "microphoneLabel": "Microphone",
       "microphoneDescription": "Choose which microphone to use for voice input.",
diff --git a/ui/goose2/src/shared/i18n/locales/es/settings.json b/ui/goose2/src/shared/i18n/locales/es/settings.json
index 33bef38d3078..2178a44e80fb 100644
--- a/ui/goose2/src/shared/i18n/locales/es/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/es/settings.json
@@ -142,8 +142,13 @@
       "updateApiKey": "Actualizar clave API",
       "removeApiKey": "Eliminar clave API",
       "localModelLabel": "Modelo Whisper local",
+      "localModelDescription": "Descarga un modelo Whisper para transcribir localmente. Seleccionar un modelo lo establece como tu modelo de transcripción local activo.",
       "localModelUnavailable": "La descarga de modelos locales aún no está disponible. Usa la CLI de Goose para descargar un modelo Whisper primero.",
+      "noLocalModels": "No hay modelos Whisper locales disponibles.",
       "download": "Descargar",
+      "selectModel": "Seleccionar",
+      "selectedModel": "Seleccionado",
+      "deleteModel": "Eliminar",
       "recommended": "Recomendado",
       "microphoneLabel": "Micrófono",
       "microphoneDescription": "Elige qué micrófono usar para la entrada de voz.",

From 608e812864b06adb57c3759d3cf0b66b1efae1b0 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 12:31:13 -0700
Subject: [PATCH 08/30] fix(goose2): notify chat input when local model
 download/delete completes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The onModelsChanged callback only called refreshConfig() — it didn't
emit notifyVoiceDictationConfigChanged(). Result: after downloading a
local Whisper model, the chat page's useVoiceDictation hook kept
stale providerStatuses and left the mic button disabled until the
window was reloaded.

Symmetric with how handleModelChange already notifies on cloud-provider
model changes. Now both paths emit the same event.
---
 ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
index cc485cbb4c75..399897a9683d 100644
--- a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -342,7 +342,10 @@ export function VoiceInputSettings() {
             <LocalWhisperModels
               selectedModelId={currentModelValue}
               onSelectModel={(modelId) => handleModelChange(modelId)}
-              onModelsChanged={() => refreshConfig()}
+              onModelsChanged={async () => {
+                await refreshConfig();
+                notifyVoiceDictationConfigChanged();
+              }}
             />
           ) : (selectedStatus.availableModels ?? []).length > 0 ? (
             <div className="space-y-2 rounded-lg border border-border px-3 py-3">

From 0a8fbe3722d729a8980facbf15254a3a0af41136 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 12:35:42 -0700
Subject: [PATCH 09/30] fix(goose2): one-click send while mic is still
 recording
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ChatInput's handleSend used to early-return when isRecording or
isTranscribing, which meant clicking Send during active dictation only
stopped the mic — you had to click Send a second time to actually send.

Remove the early return. If recording is still live, stop it with
flushPending:false and send whatever's already transcribed into the
textarea. Any in-flight audio the user spoke AFTER clicking Send is
intentionally dropped — by the time the user clicks Send, what's in the
textarea is what they want to send.

Empty-send is still blocked by the canSend guard, so an accidental Send
with no transcription is a no-op.
---
 ui/goose2/src/features/chat/ui/ChatInput.tsx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ui/goose2/src/features/chat/ui/ChatInput.tsx b/ui/goose2/src/features/chat/ui/ChatInput.tsx
index 8013c6f7eb02..e6de03928207 100644
--- a/ui/goose2/src/features/chat/ui/ChatInput.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInput.tsx
@@ -195,12 +195,12 @@ export function ChatInput({
   useEffect(() => textareaRef.current?.focus(), []);
 
   const handleSend = useCallback(() => {
-    // If recording, stop and flush — the transcription callback will
-    // append text and may auto-submit. Don't send the current text yet
-    // because the final transcription hasn't arrived.
+    // If recording, stop without waiting for final flush and send what's
+    // already transcribed into the textarea. This makes Send a single click
+    // even while the mic is hot; any in-flight audio after the user clicked
+    // Send is intentionally dropped.
     if (dictation.isRecording || dictation.isTranscribing) {
-      dictation.stopRecording();
-      return;
+      dictation.stopRecording({ flushPending: false });
     }
 
     if (!canSend) {

From 48009ab1e6eeabf1469036cc02a9a4578cb9a2e5 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 12:39:24 -0700
Subject: [PATCH 10/30] fix(goose2): microphone UX in Voice settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes:

1. useAudioDevices now subscribes to navigator.permissions.query for
   'microphone' and reflects the live OS-level permission state. Before,
   hasPermission only became true when the user clicked 'Grant access'
   from this component — if they'd already granted mic permission via
   the chat input's getUserMedia call, Voice settings still showed the
   Grant access button with no effect. Now opening Voice settings shows
   the correct state immediately and updates reactively if permission
   changes elsewhere.

2. Move the Microphone block above the per-provider (API key / model)
   config block so its visual position reflects what it is: a
   voice-level setting that applies regardless of selected provider,
   not a provider-specific detail.
---
 .../settings/ui/VoiceInputSettings.tsx        | 126 +++++++++---------
 .../shared/ui/ai-elements/mic-selector.tsx    |  31 ++++-
 2 files changed, 93 insertions(+), 64 deletions(-)

diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
index 399897a9683d..696d3ae162b4 100644
--- a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -253,6 +253,69 @@ export function VoiceInputSettings() {
         </Select>
       </div>
 
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <div className="flex items-center justify-between gap-3">
+          <div>
+            <p className="text-xs font-medium text-foreground">
+              {t("general.voiceInput.microphoneLabel")}
+            </p>
+            <p className="mt-1 text-xs text-muted-foreground">
+              {isMicrophoneSupported
+                ? t("general.voiceInput.microphoneDescription")
+                : t("general.voiceInput.microphoneUnavailable")}
+            </p>
+          </div>
+          {isMicrophoneSupported && !hasPermission ? (
+            <Button
+              type="button"
+              size="sm"
+              variant="outline-flat"
+              disabled={loadingDevices}
+              onClick={() => void requestPermission()}
+            >
+              {t("general.voiceInput.grantMicrophone")}
+            </Button>
+          ) : null}
+        </div>
+
+        {!devicesError &&
+        !hasPermission &&
+        permissionStatus === "not_determined" ? (
+          <p className="text-xs text-muted-foreground">
+            {t("general.voiceInput.microphoneAccessPrompt")}
+          </p>
+        ) : null}
+
+        {devicesError ? (
+          <p className="text-xs text-muted-foreground">{devicesError}</p>
+        ) : null}
+
+        {isMicrophoneSupported && hasPermission ? (
+          <Select
+            value={preferredMicrophoneId ?? DISABLED_PROVIDER}
+            onValueChange={(value) =>
+              setPreferredMicrophoneId(
+                value === DISABLED_PROVIDER ? null : value,
+              )
+            }
+          >
+            <SelectTrigger className="w-full max-w-sm">
+              <SelectValue>{selectedMicrophoneLabel}</SelectValue>
+            </SelectTrigger>
+            <SelectContent>
+              <SelectItem value={DISABLED_PROVIDER}>
+                {t("general.voiceInput.systemMicrophone")}
+              </SelectItem>
+              {devices.map((device) => (
+                <SelectItem key={device.deviceId} value={device.deviceId}>
+                  {device.label || t("general.voiceInput.unknownMicrophone")}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+        ) : null}
+      </div>
+
       {selectedStatus ? (
         <>
           {!selectedStatus.usesProviderConfig &&
@@ -377,69 +440,6 @@ export function VoiceInputSettings() {
         </>
       ) : null}
 
-      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
-        <div className="flex items-center justify-between gap-3">
-          <div>
-            <p className="text-xs font-medium text-foreground">
-              {t("general.voiceInput.microphoneLabel")}
-            </p>
-            <p className="mt-1 text-xs text-muted-foreground">
-              {isMicrophoneSupported
-                ? t("general.voiceInput.microphoneDescription")
-                : t("general.voiceInput.microphoneUnavailable")}
-            </p>
-          </div>
-          {isMicrophoneSupported && !hasPermission ? (
-            <Button
-              type="button"
-              size="sm"
-              variant="outline-flat"
-              disabled={loadingDevices}
-              onClick={() => void requestPermission()}
-            >
-              {t("general.voiceInput.grantMicrophone")}
-            </Button>
-          ) : null}
-        </div>
-
-        {!devicesError &&
-        !hasPermission &&
-        permissionStatus === "not_determined" ? (
-          <p className="text-xs text-muted-foreground">
-            {t("general.voiceInput.microphoneAccessPrompt")}
-          </p>
-        ) : null}
-
-        {devicesError ? (
-          <p className="text-xs text-muted-foreground">{devicesError}</p>
-        ) : null}
-
-        {isMicrophoneSupported && hasPermission ? (
-          <Select
-            value={preferredMicrophoneId ?? DISABLED_PROVIDER}
-            onValueChange={(value) =>
-              setPreferredMicrophoneId(
-                value === DISABLED_PROVIDER ? null : value,
-              )
-            }
-          >
-            <SelectTrigger className="w-full max-w-sm">
-              <SelectValue>{selectedMicrophoneLabel}</SelectValue>
-            </SelectTrigger>
-            <SelectContent>
-              <SelectItem value={DISABLED_PROVIDER}>
-                {t("general.voiceInput.systemMicrophone")}
-              </SelectItem>
-              {devices.map((device) => (
-                <SelectItem key={device.deviceId} value={device.deviceId}>
-                  {device.label || t("general.voiceInput.unknownMicrophone")}
-                </SelectItem>
-              ))}
-            </SelectContent>
-          </Select>
-        ) : null}
-      </div>
-
       <div className="space-y-2 rounded-lg border border-border px-3 py-3">
         <label
           htmlFor="voice-auto-submit-phrases"
diff --git a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
index 9e0369e135e3..210962552b52 100644
--- a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
+++ b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
@@ -111,7 +111,36 @@ export const useAudioDevices = () => {
   }, [loading]);
 
   useEffect(() => {
-    loadDevicesWithoutPermission();
+    let cancelled = false;
+    let status: PermissionStatus | null = null;
+    const onChange = () => {
+      if (!cancelled && status) {
+        setHasPermission(status.state === "granted");
+      }
+    };
+
+    const init = async () => {
+      try {
+        status = await navigator.permissions.query({
+          name: "microphone" as PermissionName,
+        });
+        if (cancelled) return;
+        setHasPermission(status.state === "granted");
+        status.addEventListener("change", onChange);
+      } catch {
+        // Permissions API not available for microphone; fall back silently.
+      }
+      if (!cancelled) await loadDevicesWithoutPermission();
+    };
+
+    void init();
+
+    return () => {
+      cancelled = true;
+      if (status) {
+        status.removeEventListener("change", onChange);
+      }
+    };
   }, [loadDevicesWithoutPermission]);
 
   useEffect(() => {

From 740decbdeb90275bec9ea49bc8e7c2d10405cbe2 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 12:41:33 -0700
Subject: [PATCH 11/30] fix(goose2): avoid empty deviceId crash in mic selector
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After routing useAudioDevices through navigator.permissions.query,
enumerateDevices() can return entries with empty-string deviceId when
the page hasn't yet exercised getUserMedia in the current session —
browsers withhold full device identifiers as a privacy measure. Radix
Select rejects empty-string values and crashes the component.

Three changes:
- When permissions.query reports 'granted' on mount, call the
  permission-ful enumeration (getUserMedia + enumerateDevices) so
  deviceIds are real from the start, not the sans-permission enumeration
  that returns empty IDs.
- Drop the 'if (loading) return' guard that made loadDevicesWithPermission
  a no-op on initial mount (loading starts true).
- Defensively filter empty deviceIds out of the Select items so stale
  enumerations can't crash the UI.
---
 .../settings/ui/VoiceInputSettings.tsx        | 12 ++++++-----
 .../shared/ui/ai-elements/mic-selector.tsx    | 20 ++++++++++++-------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
index 696d3ae162b4..fa7c347e39c9 100644
--- a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -306,11 +306,13 @@ export function VoiceInputSettings() {
               <SelectItem value={DISABLED_PROVIDER}>
                 {t("general.voiceInput.systemMicrophone")}
               </SelectItem>
-              {devices.map((device) => (
-                <SelectItem key={device.deviceId} value={device.deviceId}>
-                  {device.label || t("general.voiceInput.unknownMicrophone")}
-                </SelectItem>
-              ))}
+              {devices
+                .filter((device) => device.deviceId !== "")
+                .map((device) => (
+                  <SelectItem key={device.deviceId} value={device.deviceId}>
+                    {device.label || t("general.voiceInput.unknownMicrophone")}
+                  </SelectItem>
+                ))}
             </SelectContent>
           </Select>
         ) : null}
diff --git a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
index 210962552b52..0db6f9621592 100644
--- a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
+++ b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
@@ -74,10 +74,6 @@ export const useAudioDevices = () => {
   }, []);
 
   const loadDevicesWithPermission = useCallback(async () => {
-    if (loading) {
-      return;
-    }
-
     try {
       setLoading(true);
       setError(null);
@@ -108,7 +104,7 @@ export const useAudioDevices = () => {
     } finally {
       setLoading(false);
     }
-  }, [loading]);
+  }, []);
 
   useEffect(() => {
     let cancelled = false;
@@ -120,17 +116,27 @@ export const useAudioDevices = () => {
     };
 
     const init = async () => {
+      let alreadyGranted = false;
       try {
         status = await navigator.permissions.query({
           name: "microphone" as PermissionName,
         });
         if (cancelled) return;
-        setHasPermission(status.state === "granted");
+        alreadyGranted = status.state === "granted";
+        setHasPermission(alreadyGranted);
         status.addEventListener("change", onChange);
       } catch {
         // Permissions API not available for microphone; fall back silently.
       }
-      if (!cancelled) await loadDevicesWithoutPermission();
+      if (cancelled) return;
+      // If OS-level permission is already granted, enumerate through the
+      // permission-ful path — otherwise enumerateDevices() may return
+      // entries with empty deviceId/label, which Radix Select rejects.
+      if (alreadyGranted) {
+        await loadDevicesWithPermission();
+      } else {
+        await loadDevicesWithoutPermission();
+      }
     };
 
     void init();

From a1eb67f83adf1ac1d9096d7975d74dab523d5790 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 12:58:51 -0700
Subject: [PATCH 12/30] fix(goose2): address pre-push code review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three issues surfaced in code review:

1. shared/types/dictation.ts: trim WhisperModelStatus and
   DictationDownloadProgress to match the actual SDK response shape.
   The previous types declared fields the SDK doesn't carry (url,
   recommended, speedBps, etaSeconds, modelId); the 'as unknown as'
   casts in dictation.ts let it compile but consumers reading missing
   fields got undefined. No runtime change — just honest types.

2. goose-acp/src/server.rs: on_dictation_model_select now validates
   is_downloaded() for the Local provider before persisting. Previously
   a caller could write LOCAL_WHISPER_MODEL to a model id whose file
   isn't on disk, making is_configured(Local) return false and
   transcribe_local() fail at runtime. The UI gated this in practice,
   but the ACP method is a public interface.

3. i18n: remove dead keys (localModelUnavailable, recommended,
   providerSetupHint, active) from both en and es locales. Zero
   consumers after the LocalWhisperModels component landed.
---
 crates/goose-acp/src/server.rs                     | 7 +++++--
 ui/goose2/src/shared/i18n/locales/en/settings.json | 4 ----
 ui/goose2/src/shared/i18n/locales/es/settings.json | 4 ----
 ui/goose2/src/shared/types/dictation.ts            | 8 ++------
 4 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index 40cf27bcef7d..3f2aaf58c321 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -3225,8 +3225,11 @@ impl GooseAcpAgent {
             DictationProvider::ElevenLabs => ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY,
             #[cfg(feature = "local-inference")]
             DictationProvider::Local => {
-                if whisper::get_model(&req.model_id).is_none() {
-                    return Err(sacp::Error::invalid_params().data("Unknown model id"));
+                let model = whisper::get_model(&req.model_id)
+                    .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
+                if !model.is_downloaded() {
+                    return Err(sacp::Error::invalid_params()
+                        .data("Local Whisper model is not downloaded"));
                 }
                 whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY
             }
diff --git a/ui/goose2/src/shared/i18n/locales/en/settings.json b/ui/goose2/src/shared/i18n/locales/en/settings.json
index 6dfbd824ace5..d9733f3800b5 100644
--- a/ui/goose2/src/shared/i18n/locales/en/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/en/settings.json
@@ -130,7 +130,6 @@
       "description": "Configure voice dictation for hands-free input.",
       "providerLabel": "Transcription Provider",
       "disabled": "Disabled",
-      "active": "Active",
       "notConfiguredSuffix": "(not configured)",
       "placeholder": "Select a provider",
       "modelLabel": "Model",
@@ -143,13 +142,11 @@
       "removeApiKey": "Remove API key",
       "localModelLabel": "Local Whisper Model",
       "localModelDescription": "Download a Whisper model to run transcription locally. Selecting a model sets it as your active local transcription model.",
-      "localModelUnavailable": "Local model download is not yet available. Use the Goose CLI to download a Whisper model first.",
       "noLocalModels": "No local Whisper models available.",
       "download": "Download",
       "selectModel": "Select",
       "selectedModel": "Selected",
       "deleteModel": "Delete",
-      "recommended": "Recommended",
       "microphoneLabel": "Microphone",
       "microphoneDescription": "Choose which microphone to use for voice input.",
       "microphoneUnavailable": "Microphone access is not available in this environment.",
@@ -165,7 +162,6 @@
         "elevenlabs": "ElevenLabs",
         "local": "Local Whisper"
       },
-      "providerSetupHint": "This provider uses your main provider config. Check {{settingsPath}} to configure it.",
       "downloadProgress": "Downloading... {{percent}}%",
       "loadError": "Failed to load voice settings.",
       "saveError": "Failed to save.",
diff --git a/ui/goose2/src/shared/i18n/locales/es/settings.json b/ui/goose2/src/shared/i18n/locales/es/settings.json
index 2178a44e80fb..16e33a960aa6 100644
--- a/ui/goose2/src/shared/i18n/locales/es/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/es/settings.json
@@ -130,7 +130,6 @@
       "description": "Configura el dictado por voz para entrada manos libres.",
       "providerLabel": "Proveedor de transcripción",
       "disabled": "Desactivado",
-      "active": "Activo",
       "notConfiguredSuffix": "(no configurado)",
       "placeholder": "Selecciona un proveedor",
       "modelLabel": "Modelo",
@@ -143,13 +142,11 @@
       "removeApiKey": "Eliminar clave API",
       "localModelLabel": "Modelo Whisper local",
       "localModelDescription": "Descarga un modelo Whisper para transcribir localmente. Seleccionar un modelo lo establece como tu modelo de transcripción local activo.",
-      "localModelUnavailable": "La descarga de modelos locales aún no está disponible. Usa la CLI de Goose para descargar un modelo Whisper primero.",
       "noLocalModels": "No hay modelos Whisper locales disponibles.",
       "download": "Descargar",
       "selectModel": "Seleccionar",
       "selectedModel": "Seleccionado",
       "deleteModel": "Eliminar",
-      "recommended": "Recomendado",
       "microphoneLabel": "Micrófono",
       "microphoneDescription": "Elige qué micrófono usar para la entrada de voz.",
       "microphoneUnavailable": "El acceso al micrófono no está disponible en este entorno.",
@@ -165,7 +162,6 @@
         "elevenlabs": "ElevenLabs",
         "local": "Whisper local"
       },
-      "providerSetupHint": "Este proveedor usa tu configuración principal. Revisa {{settingsPath}} para configurarlo.",
       "downloadProgress": "Descargando... {{percent}}%",
       "loadError": "Error al cargar ajustes de voz.",
       "saveError": "Error al guardar.",
diff --git a/ui/goose2/src/shared/types/dictation.ts b/ui/goose2/src/shared/types/dictation.ts
index acf617b3fec5..f27593506772 100644
--- a/ui/goose2/src/shared/types/dictation.ts
+++ b/ui/goose2/src/shared/types/dictation.ts
@@ -33,19 +33,15 @@ export type MicrophonePermissionStatus =
 export interface WhisperModelStatus {
   id: string;
   sizeMb: number;
-  url: string;
   description: string;
   downloaded: boolean;
-  recommended: boolean;
+  downloadInProgress: boolean;
 }
 
 export interface DictationDownloadProgress {
-  modelId: string;
-  status: string;
   bytesDownloaded: number;
   totalBytes: number;
   progressPercent: number;
-  speedBps?: number | null;
-  etaSeconds?: number | null;
+  status: string;
   error?: string | null;
 }

From 383a93169a13efa0955241e5e3cb0eaa950bc4a4 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 13:06:54 -0700
Subject: [PATCH 13/30] style: cargo fmt + pnpm format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes CI Rust Code Format and Goose 2 Lint & Format checks. Whitespace
and wrap changes only — no behavioral diffs.
---
 crates/goose-acp/src/server.rs                |  5 +--
 .../settings/ui/LocalWhisperModels.tsx        | 14 +++-----
 .../shared/api/__tests__/dictation.test.ts    | 32 +++++++++++--------
 ui/goose2/src/shared/api/dictation.ts         |  5 ++-
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index 3f2aaf58c321..0ad72911b11f 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -3228,8 +3228,9 @@ impl GooseAcpAgent {
                 let model = whisper::get_model(&req.model_id)
                     .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
                 if !model.is_downloaded() {
-                    return Err(sacp::Error::invalid_params()
-                        .data("Local Whisper model is not downloaded"));
+                    return Err(
+                        sacp::Error::invalid_params().data("Local Whisper model is not downloaded")
+                    );
                 }
                 whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY
             }
diff --git a/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
index 4fa3f8dadb56..b12a242655df 100644
--- a/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
+++ b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
@@ -63,9 +63,7 @@ export function LocalWhisperModels({
       });
     } catch (err) {
       setError(
-        err instanceof Error
-          ? err.message
-          : t("general.voiceInput.loadError"),
+        err instanceof Error ? err.message : t("general.voiceInput.loadError"),
       );
     }
   }, [t]);
@@ -91,10 +89,9 @@ export function LocalWhisperModels({
 
       for (const id of downloadingIds) {
         try {
-          const progress =
-            (await getDictationLocalModelDownloadProgress(
-              id,
-            )) as unknown as DownloadProgress | null;
+          const progress = (await getDictationLocalModelDownloadProgress(
+            id,
+          )) as unknown as DownloadProgress | null;
           if (!progress) {
             finishedIds.push(id);
             continue;
@@ -223,8 +220,7 @@ export function LocalWhisperModels({
             downloadingIds.has(model.id) ||
             progress?.status === "downloading" ||
             model.downloadInProgress;
-          const isSelected =
-            model.downloaded && model.id === selectedModelId;
+          const isSelected = model.downloaded && model.id === selectedModelId;
           return (
             <li
               key={model.id}
diff --git a/ui/goose2/src/shared/api/__tests__/dictation.test.ts b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
index 27f501a851b8..79831ca7cc92 100644
--- a/ui/goose2/src/shared/api/__tests__/dictation.test.ts
+++ b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
@@ -92,26 +92,32 @@ describe("dictation SDK wiring", () => {
   });
 
   it("getDictationLocalModelDownloadProgress returns progress or null", async () => {
-    client.goose.GooseDictationModelsDownloadProgress = vi.fn().mockResolvedValue({
-      progress: {
-        bytesDownloaded: 100,
-        totalBytes: 1000,
-        progressPercent: 10,
-        status: "downloading",
-        error: null,
-      },
-    });
+    client.goose.GooseDictationModelsDownloadProgress = vi
+      .fn()
+      .mockResolvedValue({
+        progress: {
+          bytesDownloaded: 100,
+          totalBytes: 1000,
+          progressPercent: 10,
+          status: "downloading",
+          error: null,
+        },
+      });
     const result = await getDictationLocalModelDownloadProgress("tiny");
     expect(result?.bytesDownloaded).toBe(100);
-    expect(client.goose.GooseDictationModelsDownloadProgress).toHaveBeenCalledWith({
+    expect(
+      client.goose.GooseDictationModelsDownloadProgress,
+    ).toHaveBeenCalledWith({
       modelId: "tiny",
     });
   });
 
   it("getDictationLocalModelDownloadProgress returns null when no download", async () => {
-    client.goose.GooseDictationModelsDownloadProgress = vi.fn().mockResolvedValue({
-      progress: undefined,
-    });
+    client.goose.GooseDictationModelsDownloadProgress = vi
+      .fn()
+      .mockResolvedValue({
+        progress: undefined,
+      });
     const result = await getDictationLocalModelDownloadProgress("tiny");
     expect(result).toBeNull();
   });
diff --git a/ui/goose2/src/shared/api/dictation.ts b/ui/goose2/src/shared/api/dictation.ts
index 3ace459b8523..4c3b42c6e140 100644
--- a/ui/goose2/src/shared/api/dictation.ts
+++ b/ui/goose2/src/shared/api/dictation.ts
@@ -13,7 +13,10 @@ export async function getDictationConfig(): Promise<
 > {
   const client = await getClient();
   const response = await client.goose.GooseDictationConfig({});
-  return response.providers as Record<DictationProvider, DictationProviderStatus>;
+  return response.providers as Record<
+    DictationProvider,
+    DictationProviderStatus
+  >;
 }
 
 export async function transcribeDictation(request: {

From a5168468bf3f3ff458d107652316761316c8c8c3 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 13:24:51 -0700
Subject: [PATCH 14/30] Fix voice-dictation review issues before merge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four targeted fixes addressing Codex review comments (P1/P2) and a CI
clippy failure on PR #8609.

1. useVoiceDictation: fix stale-text race in handleTranscription
   Multiple dictation callbacks firing in the same tick all read `text`
   from closure, so the second callback computes `merged` from stale
   state and overwrites the first fragment — dropping dictated words in
   longer recordings. Mirror `text` in a ref that is updated both via
   effect (on React commit) and synchronously after each setText call
   in the callback, so subsequent same-tick callbacks always see the
   latest value. Applies to both the auto-submit and append branches.

2. server.rs dictation model download callback: preserve user selection
   The post-download callback unconditionally wrote the freshly
   downloaded model id to LOCAL_WHISPER_MODEL_CONFIG_KEY, silently
   switching the active model mid-session if the user already had one
   selected. Only auto-select when no valid model is currently set,
   preserving the happy path (first download still auto-selects) while
   protecting against surprise switches on subsequent downloads.

3. mic-selector: re-enumerate devices when permission flips to granted
   When OS-level mic permission is granted while the app is open (e.g.
   via System Settings), the permissions-API change handler only
   flipped `hasPermission` — the device list still reflected the
   pre-permission enumeration, which on WKWebView typically has empty
   deviceId/label entries (filtered out by VoiceInputSettings). The
   user was stuck with an empty select and no "Grant access" button.
   Trigger loadDevicesWithPermission() when state transitions to
   granted so real device metadata is picked up immediately.

4. server.rs:2796 clippy needless_borrow
   `config` is already `&'static Config` from Config::global(); drop
   the redundant `&` to pass CI's clippy -D warnings gate.

Verified:
- cargo clippy -p goose-acp --all-targets -- -D warnings: clean
- cargo test -p goose-acp -p goose-sdk: 48 passed
- pnpm tsc --noEmit (ui/goose2): clean
- pnpm vitest run (ui/goose2): 412 passed across 51 files

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/goose-acp/src/server.rs                | 27 ++++++++++++++-----
 .../features/chat/hooks/useVoiceDictation.ts  | 18 ++++++++++---
 .../shared/ui/ai-elements/mic-selector.tsx    | 13 ++++++---
 3 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index 0ad72911b11f..3095a679f3d2 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -3046,7 +3046,7 @@ impl GooseAcpAgent {
                     },
                     model_config_key: dictation_model_config_key(provider),
                     default_model: dictation_default_model(provider),
-                    selected_model: dictation_selected_model(&config, provider),
+                    selected_model: dictation_selected_model(config, provider),
                     available_models: dictation_available_models(provider),
                 },
             );
@@ -3107,11 +3107,26 @@ impl GooseAcpAgent {
                     model.url.to_string(),
                     model.local_path(),
                     Some(Box::new(move || {
-                        if let Err(e) = goose::config::Config::global().set_param(
-                            whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY,
-                            model_id_for_config.clone(),
-                        ) {
-                            error!("Failed to save LOCAL_WHISPER_MODEL after download: {}", e);
+                        let config = goose::config::Config::global();
+                        // Only auto-select this model if the user has no model
+                        // currently selected. This prevents silently switching
+                        // the active model mid-session when a user downloads an
+                        // additional model while one is already in use.
+                        let already_selected = config
+                            .get(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY, false)
+                            .ok()
+                            .and_then(|value| value.as_str().map(str::to_owned))
+                            .filter(|model_id| whisper::get_model(model_id).is_some());
+                        if already_selected.is_none() {
+                            if let Err(e) = config.set_param(
+                                whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY,
+                                model_id_for_config.clone(),
+                            ) {
+                                error!(
+                                    "Failed to save LOCAL_WHISPER_MODEL after download: {}",
+                                    e
+                                );
+                            }
                         }
                     })),
                 )
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
index 12fe9ce1f25f..f19a6036e51c 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -69,11 +69,22 @@ export function useVoiceDictation({
     (options?: { flushPending?: boolean }) => void
   >(() => {});
 
+  // Mirror `text` in a ref so `handleTranscription` always sees the latest
+  // value, even when `useDictationRecorder` fires multiple callbacks in the
+  // same tick before React has applied the first setText. Without this, two
+  // concurrent callbacks would both read a stale `text` from closure and the
+  // second would overwrite the first fragment, dropping dictated words.
+  const textRef = useRef(text);
+  useEffect(() => {
+    textRef.current = text;
+  }, [text]);
+
   const handleTranscription = useCallback(
     (fragment: string) => {
+      const latest = textRef.current;
       const match = getAutoSubmitMatch(fragment, voicePrefs.autoSubmitPhrases);
       if (match) {
-        const merged = appendTranscribedText(text, match.textWithoutPhrase);
+        const merged = appendTranscribedText(latest, match.textWithoutPhrase);
         if (merged.trim()) {
           stopRecordingRef.current({ flushPending: false });
           onSend(
@@ -82,12 +93,14 @@ export function useVoiceDictation({
             attachments.length > 0 ? attachments : undefined,
           );
           setText("");
+          textRef.current = "";
           clearAttachments();
           resetTextarea();
         }
       } else {
-        const merged = appendTranscribedText(text, fragment);
+        const merged = appendTranscribedText(latest, fragment);
         setText(merged);
+        textRef.current = merged;
       }
     },
     [
@@ -97,7 +110,6 @@ export function useVoiceDictation({
       resetTextarea,
       selectedPersonaId,
       setText,
-      text,
       voicePrefs.autoSubmitPhrases,
     ],
   );
diff --git a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
index 0db6f9621592..0a05fc705b59 100644
--- a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
+++ b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
@@ -110,8 +110,15 @@ export const useAudioDevices = () => {
     let cancelled = false;
     let status: PermissionStatus | null = null;
     const onChange = () => {
-      if (!cancelled && status) {
-        setHasPermission(status.state === "granted");
+      if (cancelled || !status) return;
+      const granted = status.state === "granted";
+      setHasPermission(granted);
+      // When permission flips to granted mid-session (e.g. the user enabled
+      // mic access via OS settings), re-enumerate devices so we pick up the
+      // real deviceIds/labels — the prior enumeration may have returned
+      // empty-string entries that VoiceInputSettings filters out.
+      if (granted) {
+        void loadDevicesWithPermission();
       }
     };
 
@@ -147,7 +154,7 @@ export const useAudioDevices = () => {
         status.removeEventListener("change", onChange);
       }
     };
-  }, [loadDevicesWithoutPermission]);
+  }, [loadDevicesWithPermission, loadDevicesWithoutPermission]);
 
   useEffect(() => {
     const handleDeviceChange = () => {

From abb3de89abbe5139f56184d80ff0c7ea09153853 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 13:30:23 -0700
Subject: [PATCH 15/30] chore: cargo fmt + untrack gitignored ui/sdk/dist
 artifacts

- cargo fmt applies a single-line collapse on the LOCAL_WHISPER_MODEL
  set_param error log in on_dictation_model_download. CI Check Rust
  Code Format was flagging the multi-line form.

- ui/sdk/dist was committed by accident during the two SDK regeneration
  passes; the directory is .gitignored on main (ui/sdk/.gitignore
  excludes dist/ and node_modules/). Untrack it here so the PR doesn't
  carry ~2500 lines of build artifacts.
---
 crates/goose-acp/src/server.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index 3095a679f3d2..4ac4484a2d8e 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -3122,10 +3122,7 @@ impl GooseAcpAgent {
                                 whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY,
                                 model_id_for_config.clone(),
                             ) {
-                                error!(
-                                    "Failed to save LOCAL_WHISPER_MODEL after download: {}",
-                                    e
-                                );
+                                error!("Failed to save LOCAL_WHISPER_MODEL after download: {}", e);
                             }
                         }
                     })),

From e448be0ae1cdbfa26cc1b215d96e532921c407ff Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 13:49:24 -0700
Subject: [PATCH 16/30] Fix voice-dictation races and auto-submit phrase offset

Addresses three issues on the voice-input PR:

1. Prevent overlapping recording startups. A rapid double-click on the mic
   could kick off a parallel startRecording while getUserMedia was still
   pending, leaking a MediaStream and leaving the OS mic indicator on.
   Add startingRef as a synchronous guard at the top of startRecording,
   and skip the toggle path while a startup is in-flight.

2. Fix the "Send leaves the mic on" bug. If the user clicked Send mid-
   startup (isRecording still false), handleSend would skip stopRecording
   and the getUserMedia that landed afterward would leave the mic hot.
   stopRecording now sets cancelStartRef; the startup path checks it at
   both await points and tears down the freshly-acquired stream instead
   of flipping isRecording to true. handleSend also calls stopRecording
   when isStarting() is true so a pending startup is cancelled.

3. Fix auto-submit phrase removal when the raw text has repeated
   whitespace. getAutoSubmitMatch matched against the normalized text
   but used -phrase.length against the raw text, chopping legitimate
   content (e.g. "hello ship   it" with phrase "ship it" produced
   "hello sh"). Match the phrase at the end of the raw text via regex
   so the slice index reflects the real phrase span. Adds a regression
   test covering the repeated-whitespace case.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../chat/hooks/useDictationRecorder.ts        | 61 +++++++++++++++++++
 .../src/features/chat/lib/voiceInput.test.ts  | 12 ++++
 ui/goose2/src/features/chat/lib/voiceInput.ts | 24 +++++++-
 ui/goose2/src/features/chat/ui/ChatInput.tsx  | 12 +++-
 4 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
index e908acabd995..e29521c7f90f 100644
--- a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
+++ b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
@@ -84,6 +84,14 @@ export function useDictationRecorder({
   const vadStateRef = useRef(createInitialVadState());
   const pendingTranscriptionsRef = useRef(0);
   const generationRef = useRef(0);
+  // Guards against overlapping startRecording calls while getUserMedia is
+  // pending (user double-clicks the mic before the first startup resolves).
+  const startingRef = useRef(false);
+  // Signals to an in-flight startRecording that the user has asked to stop.
+  // When true, the startup path tears down any just-acquired stream instead
+  // of flipping isRecording to true — otherwise the OS mic indicator would
+  // stay on after the user tried to stop/send.
+  const cancelStartRef = useRef(false);
   const providerRef = useRef(provider);
   providerRef.current = provider;
   const onErrorRef = useRef(onError);
@@ -169,6 +177,12 @@ export function useDictationRecorder({
     (options?: { flushPending?: boolean }) => {
       const flushPending = options?.flushPending ?? true;
 
+      // Signal any in-flight startRecording to abort. If getUserMedia is
+      // still pending or the audio graph hasn't been wired up yet, the
+      // startup path will see this flag and clean up the just-acquired
+      // stream instead of flipping isRecording to true.
+      cancelStartRef.current = true;
+
       if (flushPending && samplesRef.current.length > 0) {
         flushPendingSamples();
       } else if (!flushPending) {
@@ -215,6 +229,17 @@ export function useDictationRecorder({
       return;
     }
 
+    // Bail if a startup is already in-flight or we're already recording.
+    // Without this guard, a rapid second click (before getUserMedia resolves)
+    // would kick off a parallel recorder setup and leak a MediaStream — the
+    // OS mic indicator would stay on after the user thought they'd stopped.
+    if (startingRef.current || isRecording) {
+      return;
+    }
+
+    startingRef.current = true;
+    cancelStartRef.current = false;
+
     try {
       const audioConstraints: MediaTrackConstraints = {
         autoGainControl: true,
@@ -247,6 +272,17 @@ export function useDictationRecorder({
         }
       }
 
+      // If stopRecording was called while getUserMedia was pending (e.g.,
+      // user clicked Send before the mic finished setting up), tear down
+      // the freshly-acquired stream immediately and bail. Otherwise the
+      // MediaStream tracks stay hot and the OS mic indicator lingers.
+      if (cancelStartRef.current) {
+        stream.getTracks().forEach((track) => {
+          track.stop();
+        });
+        return;
+      }
+
       streamRef.current = stream;
       samplesRef.current = [];
       vadStateRef.current = createInitialVadState();
@@ -255,6 +291,13 @@ export function useDictationRecorder({
       audioContextRef.current = context;
       await context.resume();
 
+      // Check again after the async context.resume() — stopRecording may
+      // have fired while we were awaiting.
+      if (cancelStartRef.current) {
+        cleanupAudioGraph();
+        return;
+      }
+
       const source = context.createMediaStreamSource(stream);
       const processor = context.createScriptProcessor(1024, 1, 1);
       const silence = context.createGain();
@@ -275,10 +318,14 @@ export function useDictationRecorder({
     } catch (error) {
       stopRecording({ flushPending: false });
       onError(toErrorMessage(error));
+    } finally {
+      startingRef.current = false;
     }
   }, [
+    cleanupAudioGraph,
     handleFrame,
     isEnabled,
+    isRecording,
     onError,
     preferredMicrophoneId,
     provider,
@@ -286,6 +333,13 @@ export function useDictationRecorder({
   ]);
 
   const toggleRecording = useCallback(() => {
+    // If a startup is already in-flight, swallow the click. The pending
+    // startRecording will either resolve into isRecording=true (and a
+    // subsequent click will correctly stop) or be cancelled via
+    // cancelStartRef if something else calls stopRecording in the meantime.
+    if (startingRef.current) {
+      return;
+    }
     if (isRecording) {
       stopRecording();
     } else {
@@ -306,10 +360,17 @@ export function useDictationRecorder({
     }
   }, [isRecording, provider, stopRecording]);
 
+  // Imperative check for consumers (e.g. handleSend) who need to know at
+  // click time whether a startup is pending. Uses a function rather than a
+  // state value because startingRef is a ref (no render on change) and we
+  // only need the answer when the consumer is deciding what to do *now*.
+  const isStarting = useCallback(() => startingRef.current, []);
+
   return {
     isEnabled,
     isRecording,
     isTranscribing,
+    isStarting,
     startRecording,
     stopRecording,
     toggleRecording,
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.test.ts b/ui/goose2/src/features/chat/lib/voiceInput.test.ts
index 6ca3ae799d86..452e5bf2d189 100644
--- a/ui/goose2/src/features/chat/lib/voiceInput.test.ts
+++ b/ui/goose2/src/features/chat/lib/voiceInput.test.ts
@@ -39,6 +39,18 @@ describe("voiceInput helpers", () => {
     });
   });
 
+  it("strips the full raw phrase span when internal whitespace is repeated", () => {
+    // The phrase "ship it" is matched against the *normalized* text, where
+    // "ship   it" collapses to "ship it" (7 chars). But in the raw text the
+    // phrase occupies 9 chars — slicing by -phrase.length would leave a
+    // dangling "sh" on the end. The fix walks the raw text with a regex so
+    // the slice index reflects the actual phrase span in the raw string.
+    expect(getAutoSubmitMatch("hello ship   it", ["ship it"])).toEqual({
+      matchedPhrase: "ship it",
+      textWithoutPhrase: "hello",
+    });
+  });
+
   it("picks the first configured dictation provider by priority", () => {
     expect(
       getDefaultDictationProvider({
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.ts b/ui/goose2/src/features/chat/lib/voiceInput.ts
index 9997c451311a..cafaae00c984 100644
--- a/ui/goose2/src/features/chat/lib/voiceInput.ts
+++ b/ui/goose2/src/features/chat/lib/voiceInput.ts
@@ -16,6 +16,10 @@ export const DEFAULT_AUTO_SUBMIT_PHRASES_RAW = "submit";
 
 const TRAILING_PUNCTUATION_REGEX = /[\s"'`.,!?;:)\]}]+$/u;
 
+function escapeRegExp(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
 function normalizePhrase(value: string): string {
   return value
     .toLowerCase()
@@ -156,8 +160,26 @@ export function getAutoSubmitMatch(
       continue;
     }
 
+    // Map the phrase back to the *raw* transcribed text. `phrase.length` is
+    // the length in normalized form (whitespace collapsed to single spaces,
+    // lowercased, trailing punctuation stripped). Applying -phrase.length
+    // directly to trimmedText undercounts whenever the raw text has repeated
+    // whitespace or mixed case, chopping off legitimate content. Instead,
+    // match the phrase at the end of the raw text using a regex that allows
+    // flexible whitespace between words, so the slice index reflects the
+    // actual start of the phrase in the raw string.
     const trimmedText = transcribedText.replace(TRAILING_PUNCTUATION_REGEX, "");
-    const textWithoutPhrase = trimmedText.slice(0, -phrase.length).trimEnd();
+    const phraseWords = phrase.split(" ").filter(Boolean).map(escapeRegExp);
+    const phrasePattern = new RegExp(
+      `(^|\\s)(${phraseWords.join("\\s+")})\\s*$`,
+      "iu",
+    );
+    const rawMatch = trimmedText.match(phrasePattern);
+    const phraseStartOffset =
+      rawMatch && rawMatch.index !== undefined
+        ? rawMatch.index + (rawMatch[1]?.length ?? 0)
+        : trimmedText.length - phrase.length;
+    const textWithoutPhrase = trimmedText.slice(0, phraseStartOffset).trimEnd();
 
     return {
       matchedPhrase: phrase,
diff --git a/ui/goose2/src/features/chat/ui/ChatInput.tsx b/ui/goose2/src/features/chat/ui/ChatInput.tsx
index e6de03928207..00027eac7f2d 100644
--- a/ui/goose2/src/features/chat/ui/ChatInput.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInput.tsx
@@ -199,7 +199,17 @@ export function ChatInput({
     // already transcribed into the textarea. This makes Send a single click
     // even while the mic is hot; any in-flight audio after the user clicked
     // Send is intentionally dropped.
-    if (dictation.isRecording || dictation.isTranscribing) {
+    //
+    // Also handles the edge case where the user clicks Send while a
+    // getUserMedia startup is still pending (isRecording is still false but
+    // a stream is about to be acquired) — stopRecording sets the internal
+    // cancel flag so the pending startup tears itself down instead of
+    // leaving the OS mic indicator on.
+    if (
+      dictation.isRecording ||
+      dictation.isTranscribing ||
+      dictation.isStarting()
+    ) {
       dictation.stopRecording({ flushPending: false });
     }
 

From 7f63cd1706111d1c28bfa28e709b23900d87e2a9 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 14:02:31 -0700
Subject: [PATCH 17/30] Fix textRef commit-window race and stale provider
 preference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues from Codex PR review on useVoiceDictation.ts:

1. textRef was synced to `text` via a post-render useEffect, leaving a
   commit window where a transcription callback could read the previous
   value and clobber a character the user just typed into the textarea.
   Assign `textRef.current = text` during render instead (React
   explicitly permits this; see `providerRef.current = provider` in
   useDictationRecorder.ts). The synchronous `textRef.current = merged`
   follow-up after setText is kept — it still guards the
   callback-vs-callback race.

2. `activeVoiceProvider` held onto a stored preference even when that
   provider was no longer present in `providerStatuses` (feature-flagged
   off, removed from allowlist, etc.), silently disabling voice input
   even though another provider was configured. Now treat the stored
   preference as valid only when it appears in providerStatuses;
   otherwise fall back to `getDefaultDictationProvider`. The explicit
   "off" state (hasStoredProviderPreference && selectedProvider == null)
   is preserved.
---
 .../features/chat/hooks/useVoiceDictation.ts  | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
index f19a6036e51c..78950a9f081d 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -55,11 +55,21 @@ export function useVoiceDictation({
       );
   }, [fetchDictationConfig]);
 
-  const activeVoiceProvider =
-    voicePrefs.selectedProvider ??
-    (voicePrefs.hasStoredProviderPreference
+  // Treat the stored preference as valid only when it actually appears in
+  // `providerStatuses`. If the stored value points at a provider that's been
+  // feature-flagged off or removed, fall through to the default so voice
+  // input isn't silently disabled. The explicit "off" state
+  // (`hasStoredProviderPreference && selectedProvider == null`) is preserved.
+  const storedProviderIsPresent =
+    voicePrefs.selectedProvider != null &&
+    providerStatuses[voicePrefs.selectedProvider] !== undefined;
+
+  const activeVoiceProvider = storedProviderIsPresent
+    ? voicePrefs.selectedProvider
+    : voicePrefs.hasStoredProviderPreference &&
+        voicePrefs.selectedProvider == null
       ? null
-      : getDefaultDictationProvider(providerStatuses));
+      : getDefaultDictationProvider(providerStatuses);
 
   const providerConfigured =
     activeVoiceProvider != null &&
@@ -74,10 +84,15 @@ export function useVoiceDictation({
   // same tick before React has applied the first setText. Without this, two
   // concurrent callbacks would both read a stale `text` from closure and the
   // second would overwrite the first fragment, dropping dictated words.
+  //
+  // Assign during render (not in a post-render `useEffect`) so there is no
+  // commit-window race: if the user types a character in the textarea and a
+  // transcription callback resolves before the effect runs, the callback
+  // would otherwise read the previous `text` and clobber the user's edit.
+  // Writing to `ref.current` during render is explicitly supported by React
+  // (see `providerRef.current = provider;` in `useDictationRecorder.ts`).
   const textRef = useRef(text);
-  useEffect(() => {
-    textRef.current = text;
-  }, [text]);
+  textRef.current = text;
 
   const handleTranscription = useCallback(
     (fragment: string) => {

From 5e18542bfc406a047ba9b8c3a3ed5ff730921600 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 14:05:21 -0700
Subject: [PATCH 18/30] feat(goose2): clearSelectedProvider + auto-clear stale
 preference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds clearSelectedProvider() to useVoiceInputPreferences that removes the
provider preference from localStorage entirely — distinct from
setSelectedProvider(null), which pins the user to 'voice off' via a
sentinel value.

useVoiceDictation now calls clearSelectedProvider() via useEffect when it
detects the stored preference points at a provider that's no longer in
providerStatuses (feature-flagged off, removed from the allowlist, etc.).
The fall-through to getDefaultDictationProvider still works immediately
this session; the clear makes it stick so the user doesn't keep hitting
the stale-preference detection on every boot.
---
 .../src/features/chat/hooks/useVoiceDictation.ts | 14 ++++++++++++++
 .../chat/hooks/useVoiceInputPreferences.ts       | 16 ++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
index 78950a9f081d..5129e5d7d3c8 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -71,6 +71,20 @@ export function useVoiceDictation({
       ? null
       : getDefaultDictationProvider(providerStatuses);
 
+  // If a stored preference points at a provider that's no longer in
+  // providerStatuses (feature-flagged off, removed), clear it so next boot
+  // falls through to the default cleanly instead of re-detecting the stale
+  // value every session.
+  useEffect(() => {
+    if (
+      voicePrefs.selectedProvider != null &&
+      Object.keys(providerStatuses).length > 0 &&
+      providerStatuses[voicePrefs.selectedProvider] === undefined
+    ) {
+      voicePrefs.clearSelectedProvider();
+    }
+  }, [providerStatuses, voicePrefs]);
+
   const providerConfigured =
     activeVoiceProvider != null &&
     providerStatuses[activeVoiceProvider]?.configured === true;
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index 602c125e58ee..5ade9602ef44 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -123,6 +123,21 @@ export function useVoiceInputPreferences() {
     }
   }, []);
 
+  // Remove the stored preference entirely, so the user falls through to the
+  // default provider on next boot. Distinct from setSelectedProvider(null),
+  // which explicitly pins the user to "voice off" via a sentinel value.
+  const clearSelectedProvider = useCallback(() => {
+    setSelectedProviderState(null);
+    setHasStoredProviderPreferenceState(false);
+
+    try {
+      window.localStorage.removeItem(VOICE_DICTATION_PROVIDER_STORAGE_KEY);
+      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
+    } catch {
+      // localStorage may be unavailable
+    }
+  }, []);
+
   const setPreferredMicrophoneId = useCallback((value: string | null) => {
     setPreferredMicrophoneIdState(value);
 
@@ -150,6 +165,7 @@ export function useVoiceInputPreferences() {
 
   return {
     autoSubmitPhrases,
+    clearSelectedProvider,
     hasStoredProviderPreference,
     preferredMicrophoneId,
     rawAutoSubmitPhrases,

From 6664fdb83d32c3b9badf3bedd67356123c24d84e Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Thu, 16 Apr 2026 14:24:23 -0700
Subject: [PATCH 19/30] fix: three review / CI issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. clippy needless_return — CI's cargo clippy --workspace with
   local-inference active was flagging 5 'return Ok(...)' tail
   statements inside #[cfg(feature = "local-inference")] blocks in
   the new dictation model-management handlers. Replace with tail
   expressions. Blocked merge.

2. P2 — deleted model treated as active selection. The download
   completion callback's already_selected guard used
   whisper::get_model(id).is_some(), which checks metadata, not
   download state. After deleting the selected model file, the config
   key still points at it and the next download is skipped for
   auto-select. Tighten the filter to is_some_and(|m| m.is_downloaded())
   so a deleted-but-configured model is treated as no selection.

3. P2 — mic toggle disabled mid-recording. The mic button used
   'disabled={!voiceEnabled || disabled}', so if the outer 'disabled'
   prop flipped true mid-session the user lost any UI way to stop an
   active recording (Send was already blocked by canSend). Keep the
   button clickable while voiceRecording is true.
---
 crates/goose-acp/src/server.rs                  | 17 +++++++++++------
 .../src/features/chat/ui/ChatInputToolbar.tsx   |  2 +-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index 4ac4484a2d8e..ca7c0f7ff883 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -3080,7 +3080,7 @@ impl GooseAcpAgent {
                 })
                 .collect();
 
-            return Ok(DictationModelsListResponse { models });
+            Ok(DictationModelsListResponse { models })
         }
 
         #[cfg(not(feature = "local-inference"))]
@@ -3116,7 +3116,12 @@ impl GooseAcpAgent {
                             .get(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY, false)
                             .ok()
                             .and_then(|value| value.as_str().map(str::to_owned))
-                            .filter(|model_id| whisper::get_model(model_id).is_some());
+                            .filter(|model_id| {
+                                // Treat a deleted model file as no active selection
+                                // so a fresh download can auto-select cleanly.
+                                whisper::get_model(model_id)
+                                    .is_some_and(|model| model.is_downloaded())
+                            });
                         if already_selected.is_none() {
                             if let Err(e) = config.set_param(
                                 whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY,
@@ -3130,7 +3135,7 @@ impl GooseAcpAgent {
                 .await
                 .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
 
-            return Ok(EmptyResponse {});
+            Ok(EmptyResponse {})
         }
 
         #[cfg(not(feature = "local-inference"))]
@@ -3161,7 +3166,7 @@ impl GooseAcpAgent {
                         error: progress.error,
                     });
 
-            return Ok(DictationModelDownloadProgressResponse { progress });
+            Ok(DictationModelDownloadProgressResponse { progress })
         }
 
         #[cfg(not(feature = "local-inference"))]
@@ -3182,7 +3187,7 @@ impl GooseAcpAgent {
                 .cancel_download(&_req.model_id)
                 .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
 
-            return Ok(EmptyResponse {});
+            Ok(EmptyResponse {})
         }
 
         #[cfg(not(feature = "local-inference"))]
@@ -3207,7 +3212,7 @@ impl GooseAcpAgent {
             std::fs::remove_file(path)
                 .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
 
-            return Ok(EmptyResponse {});
+            Ok(EmptyResponse {})
         }
 
         #[cfg(not(feature = "local-inference"))]
diff --git a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
index e5b553569a93..94f995419ae9 100644
--- a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
@@ -393,7 +393,7 @@ export function ChatInputToolbar({
                   type="button"
                   variant="ghost"
                   size="icon-sm"
-                  disabled={!voiceEnabled || disabled}
+                  disabled={!voiceEnabled || (disabled && !voiceRecording)}
                   onClick={onVoiceToggle}
                   aria-label={
                     voiceRecording

From 5350978a3953f3f840721e0549ee340f19818eea Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 09:28:36 -0700
Subject: [PATCH 20/30] refactor(goose2): store voice prefs in goose config
 instead of localStorage

Per PR review from @jamadeo: app settings should live in the user's
goose config.yaml, not localStorage.

useVoiceInputPreferences now uses the _goose/config/{read,upsert,remove}
ACP methods for all three voice settings:
  VOICE_DICTATION_PROVIDER
  VOICE_DICTATION_PREFERRED_MIC
  VOICE_AUTO_SUBMIT_PHRASES

Config keys renamed from localStorage-style (goose:voice-*) to
uppercase-snake (matches rest of goose config.yaml conventions).
Cross-instance sync preserved via the existing window event so
VoiceInputSettings writes propagate to useVoiceDictation's reads without
requiring a remount.

Known tradeoff: the hook is now async on mount. Initial render sees
defaults (selectedProvider=null, rawAutoSubmitPhrases="submit") until
the ACP round-trip lands, typically <50ms on a local WebSocket. No new
loading state exposed; VoiceInputSettings' own loading state (based on
getDictationConfig) covers the user-visible window.

Users with existing localStorage values will see a one-time reset; the
feature is new enough that migration isn't worth the complexity.
---
 .../chat/hooks/useVoiceInputPreferences.ts    | 173 ++++++++----------
 ui/goose2/src/features/chat/lib/voiceInput.ts |  15 +-
 2 files changed, 81 insertions(+), 107 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index 5ade9602ef44..934818946923 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -1,10 +1,11 @@
 import { useCallback, useEffect, useMemo, useState } from "react";
+import { getClient } from "@/shared/api/acpConnection";
 import {
-  DISABLED_DICTATION_PROVIDER_STORAGE_VALUE,
   DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
-  VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY,
-  VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
-  VOICE_DICTATION_PROVIDER_STORAGE_KEY,
+  DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
+  VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY,
+  VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY,
+  VOICE_DICTATION_PROVIDER_CONFIG_KEY,
   normalizeDictationProvider,
   parseAutoSubmitPhrases,
 } from "../lib/voiceInput";
@@ -12,150 +13,122 @@ import type { DictationProvider } from "@/shared/types/dictation";
 
 const VOICE_INPUT_PREFERENCES_EVENT = "goose:voice-input-preferences";
 
-function readStoredAutoSubmitPhrases() {
+async function readConfigString(key: string): Promise<string | null> {
   try {
-    return (
-      window.localStorage.getItem(VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY) ??
-      DEFAULT_AUTO_SUBMIT_PHRASES_RAW
-    );
-  } catch {
-    return DEFAULT_AUTO_SUBMIT_PHRASES_RAW;
-  }
-}
-
-function readStoredDictationProvider(): DictationProvider | null {
-  try {
-    const storedValue = window.localStorage.getItem(
-      VOICE_DICTATION_PROVIDER_STORAGE_KEY,
-    );
-
-    if (storedValue === DISABLED_DICTATION_PROVIDER_STORAGE_VALUE) {
-      return null;
-    }
-
-    return normalizeDictationProvider(storedValue);
+    const client = await getClient();
+    const response = await client.goose.GooseConfigRead({ key });
+    return typeof response.value === "string" ? response.value : null;
   } catch {
     return null;
   }
 }
 
-function readHasStoredDictationProviderPreference() {
+async function writeConfigString(key: string, value: string): Promise<void> {
   try {
-    return (
-      window.localStorage.getItem(VOICE_DICTATION_PROVIDER_STORAGE_KEY) !== null
-    );
+    const client = await getClient();
+    await client.goose.GooseConfigUpsert({ key, value });
   } catch {
-    return false;
+    // goose config may be unavailable
   }
 }
 
-function readStoredPreferredMicrophoneId() {
+async function removeConfigKey(key: string): Promise<void> {
   try {
-    return window.localStorage.getItem(
-      VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
-    );
+    const client = await getClient();
+    await client.goose.GooseConfigRemove({ key });
   } catch {
-    return null;
+    // goose config may be unavailable
   }
 }
 
 export function useVoiceInputPreferences() {
-  const [rawAutoSubmitPhrases, setRawAutoSubmitPhrasesState] = useState(
-    readStoredAutoSubmitPhrases,
-  );
-  const [selectedProvider, setSelectedProviderState] = useState(
-    readStoredDictationProvider,
+  const [rawAutoSubmitPhrases, setRawAutoSubmitPhrasesState] = useState<string>(
+    DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
   );
+  const [selectedProvider, setSelectedProviderState] =
+    useState<DictationProvider | null>(null);
   const [hasStoredProviderPreference, setHasStoredProviderPreferenceState] =
-    useState(readHasStoredDictationProviderPreference);
-  const [preferredMicrophoneId, setPreferredMicrophoneIdState] = useState(
-    readStoredPreferredMicrophoneId,
-  );
+    useState<boolean>(false);
+  const [preferredMicrophoneId, setPreferredMicrophoneIdState] = useState<
+    string | null
+  >(null);
+
+  const syncFromConfig = useCallback(async () => {
+    const [phrasesValue, providerValue, micValue] = await Promise.all([
+      readConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY),
+      readConfigString(VOICE_DICTATION_PROVIDER_CONFIG_KEY),
+      readConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY),
+    ]);
+
+    setRawAutoSubmitPhrasesState(
+      phrasesValue ?? DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
+    );
+
+    if (providerValue === DISABLED_DICTATION_PROVIDER_CONFIG_VALUE) {
+      setSelectedProviderState(null);
+      setHasStoredProviderPreferenceState(true);
+    } else {
+      setSelectedProviderState(normalizeDictationProvider(providerValue));
+      setHasStoredProviderPreferenceState(providerValue !== null);
+    }
+
+    setPreferredMicrophoneIdState(micValue);
+  }, []);
 
   useEffect(() => {
-    const syncFromStorage = () => {
-      setRawAutoSubmitPhrasesState(readStoredAutoSubmitPhrases());
-      setSelectedProviderState(readStoredDictationProvider());
-      setHasStoredProviderPreferenceState(
-        readHasStoredDictationProviderPreference(),
-      );
-      setPreferredMicrophoneIdState(readStoredPreferredMicrophoneId());
+    void syncFromConfig();
+    const handler = () => {
+      void syncFromConfig();
     };
-
-    window.addEventListener("storage", syncFromStorage);
     window.addEventListener(
       VOICE_INPUT_PREFERENCES_EVENT,
-      syncFromStorage as EventListener,
+      handler as EventListener,
     );
-
     return () => {
-      window.removeEventListener("storage", syncFromStorage);
       window.removeEventListener(
         VOICE_INPUT_PREFERENCES_EVENT,
-        syncFromStorage as EventListener,
+        handler as EventListener,
       );
     };
-  }, []);
+  }, [syncFromConfig]);
 
   const setRawAutoSubmitPhrases = useCallback((value: string) => {
     setRawAutoSubmitPhrasesState(value);
-
-    try {
-      window.localStorage.setItem(VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY, value);
-      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-    } catch {
-      // localStorage may be unavailable
-    }
+    void writeConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY, value);
+    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
   }, []);
 
-  const setSelectedProvider = useCallback((value: DictationProvider | null) => {
-    setSelectedProviderState(value);
-    setHasStoredProviderPreferenceState(true);
-
-    try {
-      window.localStorage.setItem(
-        VOICE_DICTATION_PROVIDER_STORAGE_KEY,
-        value ?? DISABLED_DICTATION_PROVIDER_STORAGE_VALUE,
+  const setSelectedProvider = useCallback(
+    (value: DictationProvider | null) => {
+      setSelectedProviderState(value);
+      setHasStoredProviderPreferenceState(true);
+      void writeConfigString(
+        VOICE_DICTATION_PROVIDER_CONFIG_KEY,
+        value ?? DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
       );
       window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-    } catch {
-      // localStorage may be unavailable
-    }
-  }, []);
+    },
+    [],
+  );
 
   // Remove the stored preference entirely, so the user falls through to the
   // default provider on next boot. Distinct from setSelectedProvider(null),
-  // which explicitly pins the user to "voice off" via a sentinel value.
+  // which pins the user to "voice off" via a sentinel value.
   const clearSelectedProvider = useCallback(() => {
     setSelectedProviderState(null);
     setHasStoredProviderPreferenceState(false);
-
-    try {
-      window.localStorage.removeItem(VOICE_DICTATION_PROVIDER_STORAGE_KEY);
-      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-    } catch {
-      // localStorage may be unavailable
-    }
+    void removeConfigKey(VOICE_DICTATION_PROVIDER_CONFIG_KEY);
+    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
   }, []);
 
   const setPreferredMicrophoneId = useCallback((value: string | null) => {
     setPreferredMicrophoneIdState(value);
-
-    try {
-      if (value) {
-        window.localStorage.setItem(
-          VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
-          value,
-        );
-      } else {
-        window.localStorage.removeItem(
-          VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY,
-        );
-      }
-      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-    } catch {
-      // localStorage may be unavailable
+    if (value) {
+      void writeConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY, value);
+    } else {
+      void removeConfigKey(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY);
     }
+    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
   }, []);
 
   const autoSubmitPhrases = useMemo(
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.ts b/ui/goose2/src/features/chat/lib/voiceInput.ts
index cafaae00c984..3e1fd9dce0bc 100644
--- a/ui/goose2/src/features/chat/lib/voiceInput.ts
+++ b/ui/goose2/src/features/chat/lib/voiceInput.ts
@@ -3,14 +3,15 @@ import type {
   DictationProviderStatus,
 } from "@/shared/types/dictation";
 
-export const VOICE_AUTO_SUBMIT_PHRASES_STORAGE_KEY =
-  "goose:voice-auto-submit-phrases";
-export const VOICE_DICTATION_PROVIDER_STORAGE_KEY =
-  "goose:voice-dictation-provider";
-export const VOICE_DICTATION_PREFERRED_MIC_STORAGE_KEY =
-  "goose:voice-dictation-preferred-mic";
+// goose config keys — stored in the user's goose config.yaml via the
+// _goose/config/{read,upsert,remove} ACP methods, not localStorage.
+export const VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY =
+  "VOICE_AUTO_SUBMIT_PHRASES";
+export const VOICE_DICTATION_PROVIDER_CONFIG_KEY = "VOICE_DICTATION_PROVIDER";
+export const VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY =
+  "VOICE_DICTATION_PREFERRED_MIC";
 export const VOICE_DICTATION_CONFIG_EVENT = "goose:voice-dictation-config";
-export const DISABLED_DICTATION_PROVIDER_STORAGE_VALUE = "__disabled__";
+export const DISABLED_DICTATION_PROVIDER_CONFIG_VALUE = "__disabled__";
 
 export const DEFAULT_AUTO_SUBMIT_PHRASES_RAW = "submit";
 

From 9277a92d5ccac7c7fd2ac67753a10eebab5f9dd0 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 09:36:20 -0700
Subject: [PATCH 21/30] style: biome format

Formatter wanted the two export consts and the setSelectedProvider
callback collapsed onto single lines. pnpm check was failing on Lint &
Format in CI; pnpm format --write applied these. No behavior change.
---
 .../chat/hooks/useVoiceInputPreferences.ts    | 21 ++++++++-----------
 ui/goose2/src/features/chat/lib/voiceInput.ts |  3 +--
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index 934818946923..8bb14d160684 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -98,18 +98,15 @@ export function useVoiceInputPreferences() {
     window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
   }, []);
 
-  const setSelectedProvider = useCallback(
-    (value: DictationProvider | null) => {
-      setSelectedProviderState(value);
-      setHasStoredProviderPreferenceState(true);
-      void writeConfigString(
-        VOICE_DICTATION_PROVIDER_CONFIG_KEY,
-        value ?? DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
-      );
-      window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-    },
-    [],
-  );
+  const setSelectedProvider = useCallback((value: DictationProvider | null) => {
+    setSelectedProviderState(value);
+    setHasStoredProviderPreferenceState(true);
+    void writeConfigString(
+      VOICE_DICTATION_PROVIDER_CONFIG_KEY,
+      value ?? DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
+    );
+    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
+  }, []);
 
   // Remove the stored preference entirely, so the user falls through to the
   // default provider on next boot. Distinct from setSelectedProvider(null),
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.ts b/ui/goose2/src/features/chat/lib/voiceInput.ts
index 3e1fd9dce0bc..b349f89165ca 100644
--- a/ui/goose2/src/features/chat/lib/voiceInput.ts
+++ b/ui/goose2/src/features/chat/lib/voiceInput.ts
@@ -5,8 +5,7 @@ import type {
 
 // goose config keys — stored in the user's goose config.yaml via the
 // _goose/config/{read,upsert,remove} ACP methods, not localStorage.
-export const VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY =
-  "VOICE_AUTO_SUBMIT_PHRASES";
+export const VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY = "VOICE_AUTO_SUBMIT_PHRASES";
 export const VOICE_DICTATION_PROVIDER_CONFIG_KEY = "VOICE_DICTATION_PROVIDER";
 export const VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY =
   "VOICE_DICTATION_PREFERRED_MIC";

From 82738891530d644d542e6dc020ce08df363baaff Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 09:43:03 -0700
Subject: [PATCH 22/30] fix(goose2): voice auto-submit guards + clear stale
 provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two Codex review issues on #8609:

1. P1 — auto-submit bypassed send guards. useVoiceDictation's
   handleTranscription called onSend directly when a trigger phrase
   matched, bypassing ChatInput's canSend / hasQueuedMessage /
   disabled checks. A dictation phrase could dispatch a message while
   another was already queued or while input was otherwise blocked.
   Add an isSendLocked prop to the hook; when true, the trigger phrase
   is stripped and the remaining transcription is left in the textarea
   for the user to review and send manually. ChatInput passes
   hasQueuedMessage || disabled, matching its own send path.

2. P2 — VoiceInputSettings.refreshConfig persisted the disabled
   sentinel when the stored provider disappeared from the fetched
   config. That turned an invalid/stale preference into a durable
   "voice off" opt-out, so the user stayed disabled across sessions
   even after valid providers reappeared. Use the new
   clearSelectedProvider() to remove the key outright, matching the
   self-heal that useVoiceDictation already does.
---
 .../features/chat/hooks/useVoiceDictation.ts  | 43 ++++++++++++++-----
 ui/goose2/src/features/chat/ui/ChatInput.tsx  |  4 +-
 .../settings/ui/VoiceInputSettings.tsx        | 15 ++++++-
 3 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
index 5129e5d7d3c8..b635145f8b28 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -23,6 +23,16 @@ interface UseVoiceDictationOptions {
     attachments?: ChatAttachmentDraft[],
   ) => void;
   resetTextarea: () => void;
+  /**
+   * When true, auto-submit on trigger phrase will NOT call `onSend`.
+   * Instead, the trigger phrase is stripped and the remaining transcription
+   * is left in the textarea for the user to review and send manually.
+   * Caller should set this to match `ChatInput`'s own send guards
+   * (queued-message lockout, outer `disabled` state, etc.) so voice
+   * auto-submit can't bypass the UI's protection against extra sends
+   * during an active run.
+   */
+  isSendLocked?: boolean;
 }
 
 export function useVoiceDictation({
@@ -33,6 +43,7 @@ export function useVoiceDictation({
   selectedPersonaId,
   onSend,
   resetTextarea,
+  isSendLocked = false,
 }: UseVoiceDictationOptions) {
   const voicePrefs = useVoiceInputPreferences();
   const [providerStatuses, setProviderStatuses] = useState<
@@ -114,18 +125,27 @@ export function useVoiceDictation({
       const match = getAutoSubmitMatch(fragment, voicePrefs.autoSubmitPhrases);
       if (match) {
         const merged = appendTranscribedText(latest, match.textWithoutPhrase);
-        if (merged.trim()) {
-          stopRecordingRef.current({ flushPending: false });
-          onSend(
-            merged.trim(),
-            selectedPersonaId ?? undefined,
-            attachments.length > 0 ? attachments : undefined,
-          );
-          setText("");
-          textRef.current = "";
-          clearAttachments();
-          resetTextarea();
+        if (!merged.trim()) {
+          return;
         }
+        stopRecordingRef.current({ flushPending: false });
+        if (isSendLocked) {
+          // Parent UI is blocking sends (queued message, disabled, etc.).
+          // Strip the trigger phrase and leave the transcription in the
+          // textarea so the user can send it manually when the lock clears.
+          setText(merged);
+          textRef.current = merged;
+          return;
+        }
+        onSend(
+          merged.trim(),
+          selectedPersonaId ?? undefined,
+          attachments.length > 0 ? attachments : undefined,
+        );
+        setText("");
+        textRef.current = "";
+        clearAttachments();
+        resetTextarea();
       } else {
         const merged = appendTranscribedText(latest, fragment);
         setText(merged);
@@ -135,6 +155,7 @@ export function useVoiceDictation({
     [
       attachments,
       clearAttachments,
+      isSendLocked,
       onSend,
       resetTextarea,
       selectedPersonaId,
diff --git a/ui/goose2/src/features/chat/ui/ChatInput.tsx b/ui/goose2/src/features/chat/ui/ChatInput.tsx
index 00027eac7f2d..8898e2aee250 100644
--- a/ui/goose2/src/features/chat/ui/ChatInput.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInput.tsx
@@ -128,6 +128,8 @@ export function ChatInput({
     }
   }, []);
 
+  const hasQueuedMessage = queuedMessage !== null;
+
   const dictation = useVoiceDictation({
     text,
     setText,
@@ -136,6 +138,7 @@ export function ChatInput({
     selectedPersonaId,
     onSend,
     resetTextarea,
+    isSendLocked: hasQueuedMessage || disabled,
   });
 
   const activePersona = useMemo(
@@ -150,7 +153,6 @@ export function ChatInput({
   );
   const stickyPersona = activePersona;
 
-  const hasQueuedMessage = queuedMessage !== null;
   const canSend =
     (text.trim().length > 0 || attachments.length > 0) &&
     !hasQueuedMessage &&
diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
index fa7c347e39c9..df9b7a947f2a 100644
--- a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -32,6 +32,7 @@ const DISABLED_PROVIDER = "__disabled__";
 export function VoiceInputSettings() {
   const { t } = useTranslation(["settings", "chat", "common"]);
   const {
+    clearSelectedProvider,
     hasStoredProviderPreference,
     preferredMicrophoneId,
     rawAutoSubmitPhrases,
@@ -75,10 +76,20 @@ export function VoiceInputSettings() {
       return;
     }
 
+    // The stored provider is no longer in the fetched config (e.g. it was
+    // feature-flagged off or removed). Clear the preference entirely rather
+    // than writing `null`, which would persist the explicit "voice off"
+    // sentinel and leave the user opted out across future sessions even
+    // after valid providers reappear.
     if (!nextConfig[selectedProvider]) {
-      setSelectedProvider(null);
+      clearSelectedProvider();
     }
-  }, [hasStoredProviderPreference, selectedProvider, setSelectedProvider]);
+  }, [
+    clearSelectedProvider,
+    hasStoredProviderPreference,
+    selectedProvider,
+    setSelectedProvider,
+  ]);
 
   useEffect(() => {
     const load = async () => {

From 65b4a578d5fce0f20fb94b943f97653f7e1394a2 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 09:54:40 -0700
Subject: [PATCH 23/30] fix(goose2): preserve chunk order + clear unknown
 stored provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two Codex review issues on #8609:

1. P1 — transcription chunks could emit out of order. useDictationRecorder
   fired multiple transcribeChunk calls concurrently as samples crossed
   VAD boundaries; if a later chunk's API call resolved faster than an
   earlier one, onTranscription would append them in the wrong order,
   scrambling long dictation sessions with variable API latency. Assign
   a per-generation monotonic seq number at enqueue, buffer results in a
   Map, drain contiguous prefix to onTranscription. Empty transcriptions
   still occupy a slot so they don't stall later chunks, and errors
   unblock the queue the same way. generationRef += 1 now also resets
   the sequence state so in-flight old-gen chunks can bail at the gen
   check without leaving a gap.

2. P2 — unknown stored provider value was being persisted as voice-off.
   useVoiceInputPreferences.syncFromConfig set hasStoredProviderPreference
   = (providerValue !== null), which was also true for unrecognized
   strings (stale config from older builds, typos). Combined with
   normalizeDictationProvider returning null, downstream code interpreted
   this as an explicit "voice off" opt-out, leaving voice disabled until
   the user manually re-selected. Only mark the preference as present when
   the value is recognized (or the explicit disabled sentinel); otherwise
   clear the config key so future boots fall through to default cleanly.
---
 .../chat/hooks/useDictationRecorder.ts        | 44 ++++++++++++++++++-
 .../chat/hooks/useVoiceInputPreferences.ts    | 18 +++++++-
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
index e29521c7f90f..89a71b5009a7 100644
--- a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
+++ b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
@@ -84,6 +84,15 @@ export function useDictationRecorder({
   const vadStateRef = useRef(createInitialVadState());
   const pendingTranscriptionsRef = useRef(0);
   const generationRef = useRef(0);
+  // Per-generation sequence numbers so out-of-order transcription responses
+  // can be reassembled into the order the chunks were captured. Without this,
+  // a later chunk whose API call resolves faster can be appended before an
+  // earlier, slower one — scrambling long dictation sessions with variable
+  // API latency. Empty transcriptions still occupy a slot so they don't block
+  // subsequent chunks.
+  const chunkSeqRef = useRef(0);
+  const nextExpectedSeqRef = useRef(0);
+  const pendingResultsRef = useRef<Map<number, string>>(new Map());
   // Guards against overlapping startRecording calls while getUserMedia is
   // pending (user double-clicks the mic before the first startup resolves).
   const startingRef = useRef(false);
@@ -121,6 +130,8 @@ export function useDictationRecorder({
     }
 
     const gen = generationRef.current;
+    const mySeq = chunkSeqRef.current;
+    chunkSeqRef.current += 1;
     pendingTranscriptionsRef.current += 1;
     setIsTranscribing(true);
 
@@ -139,11 +150,34 @@ export function useDictationRecorder({
         return;
       }
 
-      if (response.text.trim()) {
-        onTranscriptionRef.current(response.text);
+      // Buffer by sequence number, then drain any contiguous prefix so
+      // emissions to onTranscription stay in capture order even when API
+      // responses resolve out of order.
+      pendingResultsRef.current.set(mySeq, response.text);
+      while (pendingResultsRef.current.has(nextExpectedSeqRef.current)) {
+        const text = pendingResultsRef.current.get(nextExpectedSeqRef.current);
+        pendingResultsRef.current.delete(nextExpectedSeqRef.current);
+        nextExpectedSeqRef.current += 1;
+        if (text && text.trim()) {
+          onTranscriptionRef.current(text);
+        }
       }
     } catch (error) {
       onErrorRef.current(toErrorMessage(error));
+      // Unblock the queue so a failure doesn't stall every subsequent chunk.
+      if (gen === generationRef.current) {
+        pendingResultsRef.current.set(mySeq, "");
+        while (pendingResultsRef.current.has(nextExpectedSeqRef.current)) {
+          const text = pendingResultsRef.current.get(
+            nextExpectedSeqRef.current,
+          );
+          pendingResultsRef.current.delete(nextExpectedSeqRef.current);
+          nextExpectedSeqRef.current += 1;
+          if (text && text.trim()) {
+            onTranscriptionRef.current(text);
+          }
+        }
+      }
     } finally {
       pendingTranscriptionsRef.current -= 1;
       if (pendingTranscriptionsRef.current === 0) {
@@ -188,6 +222,12 @@ export function useDictationRecorder({
       } else if (!flushPending) {
         samplesRef.current = [];
         generationRef.current += 1;
+        // Reset chunk-ordering state so the new generation starts at seq 0.
+        // In-flight chunks from the old generation bail at the gen check in
+        // transcribeChunk without touching the pending map.
+        chunkSeqRef.current = 0;
+        nextExpectedSeqRef.current = 0;
+        pendingResultsRef.current.clear();
       }
 
       vadStateRef.current = createInitialVadState();
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index 8bb14d160684..662403bbe1c2 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -67,9 +67,23 @@ export function useVoiceInputPreferences() {
     if (providerValue === DISABLED_DICTATION_PROVIDER_CONFIG_VALUE) {
       setSelectedProviderState(null);
       setHasStoredProviderPreferenceState(true);
+    } else if (providerValue != null) {
+      const normalized = normalizeDictationProvider(providerValue);
+      if (normalized !== null) {
+        setSelectedProviderState(normalized);
+        setHasStoredProviderPreferenceState(true);
+      } else {
+        // Stored value isn't a recognized provider (stale from an older
+        // build, typo, etc.). Treat as no preference — don't pin the user
+        // to voice-off — and clear the config key so future boots fall
+        // through to the default cleanly.
+        setSelectedProviderState(null);
+        setHasStoredProviderPreferenceState(false);
+        void removeConfigKey(VOICE_DICTATION_PROVIDER_CONFIG_KEY);
+      }
     } else {
-      setSelectedProviderState(normalizeDictationProvider(providerValue));
-      setHasStoredProviderPreferenceState(providerValue !== null);
+      setSelectedProviderState(null);
+      setHasStoredProviderPreferenceState(false);
     }
 
     setPreferredMicrophoneIdState(micValue);

From 40f3015e34fb3e8a90fb75def0720f881a714858 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 10:20:59 -0700
Subject: [PATCH 24/30] fix(goose2): don't overwrite stored provider during
 pre-hydration race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

useVoiceInputPreferences loads the stored provider asynchronously via
ACP. On mount, hasStoredProviderPreference defaults to false until the
config round-trip lands, so VoiceInputSettings.refreshConfig — which
runs simultaneously — closed over false and called
setSelectedProvider(getDefaultDictationProvider(...)) before the real
value arrived, clobbering the user's saved choice (including their
explicit disable).

Add an isHydrated flag to the prefs hook that flips true after the
first syncFromConfig completes. VoiceInputSettings.refreshConfig now
bails the auto-select path until isHydrated is true. The refreshConfig
useCallback lists voicePrefsHydrated as a dep, so when hydration
completes the mount useEffect re-fires refreshConfig with trustworthy
state.
---
 .../features/chat/hooks/useVoiceInputPreferences.ts    |  8 ++++++++
 .../src/features/settings/ui/VoiceInputSettings.tsx    | 10 ++++++++++
 2 files changed, 18 insertions(+)

diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index 662403bbe1c2..d755576c35de 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -52,6 +52,12 @@ export function useVoiceInputPreferences() {
   const [preferredMicrophoneId, setPreferredMicrophoneIdState] = useState<
     string | null
   >(null);
+  // Flips true after the first syncFromConfig completes so consumers can
+  // distinguish "no stored preference" from "the ACP round-trip hasn't
+  // finished yet." Without this, a consumer that auto-writes a default when
+  // hasStoredProviderPreference is false can race ahead and overwrite the
+  // user's saved choice before it loads.
+  const [isHydrated, setIsHydrated] = useState(false);
 
   const syncFromConfig = useCallback(async () => {
     const [phrasesValue, providerValue, micValue] = await Promise.all([
@@ -87,6 +93,7 @@ export function useVoiceInputPreferences() {
     }
 
     setPreferredMicrophoneIdState(micValue);
+    setIsHydrated(true);
   }, []);
 
   useEffect(() => {
@@ -151,6 +158,7 @@ export function useVoiceInputPreferences() {
     autoSubmitPhrases,
     clearSelectedProvider,
     hasStoredProviderPreference,
+    isHydrated,
     preferredMicrophoneId,
     rawAutoSubmitPhrases,
     selectedProvider,
diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
index df9b7a947f2a..3203608c1d8d 100644
--- a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -34,6 +34,7 @@ export function VoiceInputSettings() {
   const {
     clearSelectedProvider,
     hasStoredProviderPreference,
+    isHydrated: voicePrefsHydrated,
     preferredMicrophoneId,
     rawAutoSubmitPhrases,
     selectedProvider,
@@ -64,6 +65,14 @@ export function VoiceInputSettings() {
     const nextConfig = await getDictationConfig();
     setProviderStatuses(nextConfig);
 
+    // Wait for useVoiceInputPreferences to finish loading the stored value
+    // from goose config before deciding whether to auto-select a default.
+    // Otherwise the initial mount sees hasStoredProviderPreference=false
+    // (pre-hydration default) and clobbers the user's saved choice.
+    if (!voicePrefsHydrated) {
+      return;
+    }
+
     if (!hasStoredProviderPreference) {
       const defaultProvider = getDefaultDictationProvider(nextConfig);
       if (defaultProvider) {
@@ -89,6 +98,7 @@ export function VoiceInputSettings() {
     hasStoredProviderPreference,
     selectedProvider,
     setSelectedProvider,
+    voicePrefsHydrated,
   ]);
 
   useEffect(() => {

From 70d569ce0a85c7b0d7ab69884e211d3e0617b608 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 11:06:37 -0700
Subject: [PATCH 25/30] fix(goose2): address dictation review feedback

Signed-off-by: tulsi <tulsi@block.xyz>
---
 ui/goose2/src/features/chat/ui/ChatInput.tsx  |   8 +-
 .../chat/ui/__tests__/ChatInput.test.tsx      |  40 +++++++
 .../settings/ui/LocalWhisperModels.tsx        |   5 +
 .../ui/__tests__/LocalWhisperModels.test.tsx  | 105 ++++++++++++++++++
 4 files changed, 154 insertions(+), 4 deletions(-)
 create mode 100644 ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx

diff --git a/ui/goose2/src/features/chat/ui/ChatInput.tsx b/ui/goose2/src/features/chat/ui/ChatInput.tsx
index 8898e2aee250..4a21f9a60c1d 100644
--- a/ui/goose2/src/features/chat/ui/ChatInput.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInput.tsx
@@ -197,6 +197,10 @@ export function ChatInput({
   useEffect(() => textareaRef.current?.focus(), []);
 
   const handleSend = useCallback(() => {
+    if (!canSend) {
+      return;
+    }
+
     // If recording, stop without waiting for final flush and send what's
     // already transcribed into the textarea. This makes Send a single click
     // even while the mic is hot; any in-flight audio after the user clicked
@@ -215,10 +219,6 @@ export function ChatInput({
       dictation.stopRecording({ flushPending: false });
     }
 
-    if (!canSend) {
-      return;
-    }
-
     onSend(
       text.trim(),
       selectedPersonaId ?? undefined,
diff --git a/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx b/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
index 0561892dd011..ab35a79800c2 100644
--- a/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
+++ b/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
@@ -5,6 +5,19 @@ import { useState } from "react";
 import { ChatInput } from "../ChatInput";
 import type { Persona } from "@/shared/types/agents";
 
+const mockVoiceDictation = {
+  isEnabled: true,
+  isRecording: false,
+  isTranscribing: false,
+  isStarting: vi.fn(() => false),
+  stopRecording: vi.fn(),
+  toggleRecording: vi.fn(),
+};
+
+vi.mock("../hooks/useVoiceDictation", () => ({
+  useVoiceDictation: () => mockVoiceDictation,
+}));
+
 vi.mock("@/features/providers/hooks/useAgentProviderStatus", () => ({
   useAgentProviderStatus: () => ({
     readyAgentIds: new Set(["goose", "claude-acp", "codex-acp"]),
@@ -63,6 +76,13 @@ describe("ChatInput", () => {
   beforeEach(() => {
     mockListFilesForMentions.mockClear();
     mockListFilesForMentions.mockResolvedValue([]);
+    mockVoiceDictation.isEnabled = true;
+    mockVoiceDictation.isRecording = false;
+    mockVoiceDictation.isTranscribing = false;
+    mockVoiceDictation.isStarting.mockReset();
+    mockVoiceDictation.isStarting.mockReturnValue(false);
+    mockVoiceDictation.stopRecording.mockReset();
+    mockVoiceDictation.toggleRecording.mockReset();
   });
 
   it("renders with default placeholder", () => {
@@ -418,6 +438,26 @@ describe("ChatInput", () => {
     expect(onSend).not.toHaveBeenCalled();
   });
 
+  it("does not stop dictation when send is blocked", async () => {
+    const onSend = vi.fn();
+    const user = userEvent.setup();
+    mockVoiceDictation.isRecording = true;
+
+    render(
+      <ChatInput
+        onSend={onSend}
+        isStreaming
+        queuedMessage={{ text: "queued msg" }}
+      />,
+    );
+
+    await user.type(screen.getByRole("textbox"), "another message");
+    await user.keyboard("{Enter}");
+
+    expect(onSend).not.toHaveBeenCalled();
+    expect(mockVoiceDictation.stopRecording).not.toHaveBeenCalled();
+  });
+
   it("keeps the selected assistant chip after sending subsequent messages", async () => {
     const onSend = vi.fn();
     const user = userEvent.setup();
diff --git a/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
index b12a242655df..751ef8a628b7 100644
--- a/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
+++ b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
@@ -153,6 +153,11 @@ export function LocalWhisperModels({
             : t("general.voiceInput.saveError"),
         );
       } finally {
+        setProgresses((prev) => {
+          const next = new Map(prev);
+          next.delete(modelId);
+          return next;
+        });
         setDownloadingIds((prev) => {
           const next = new Set(prev);
           next.delete(modelId);
diff --git a/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx b/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
new file mode 100644
index 000000000000..f2cbca3e4623
--- /dev/null
+++ b/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
@@ -0,0 +1,105 @@
+import { render, screen, waitFor } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { LocalWhisperModels } from "../LocalWhisperModels";
+
+const mockListDictationLocalModels = vi.fn();
+const mockDownloadDictationLocalModel = vi.fn();
+const mockGetDictationLocalModelDownloadProgress = vi.fn();
+const mockCancelDictationLocalModelDownload = vi.fn();
+const mockDeleteDictationLocalModel = vi.fn();
+
+vi.mock("@/shared/api/dictation", () => ({
+  listDictationLocalModels: (...args: unknown[]) =>
+    mockListDictationLocalModels(...args),
+  downloadDictationLocalModel: (...args: unknown[]) =>
+    mockDownloadDictationLocalModel(...args),
+  getDictationLocalModelDownloadProgress: (...args: unknown[]) =>
+    mockGetDictationLocalModelDownloadProgress(...args),
+  cancelDictationLocalModelDownload: (...args: unknown[]) =>
+    mockCancelDictationLocalModelDownload(...args),
+  deleteDictationLocalModel: (...args: unknown[]) =>
+    mockDeleteDictationLocalModel(...args),
+}));
+
+describe("LocalWhisperModels", () => {
+  beforeEach(() => {
+    mockListDictationLocalModels.mockReset();
+    mockDownloadDictationLocalModel.mockReset();
+    mockGetDictationLocalModelDownloadProgress.mockReset();
+    mockCancelDictationLocalModelDownload.mockReset();
+    mockDeleteDictationLocalModel.mockReset();
+  });
+
+  it("clears cached progress when cancelling a download", async () => {
+    const user = userEvent.setup();
+    const onModelsChanged = vi.fn();
+
+    mockListDictationLocalModels
+      .mockResolvedValueOnce([
+        {
+          id: "tiny",
+          description: "Tiny model",
+          sizeMb: 75,
+          downloaded: false,
+          downloadInProgress: true,
+        },
+      ])
+      .mockResolvedValueOnce([
+        {
+          id: "tiny",
+          description: "Tiny model",
+          sizeMb: 75,
+          downloaded: false,
+          downloadInProgress: false,
+        },
+      ]);
+    mockGetDictationLocalModelDownloadProgress.mockResolvedValue({
+      bytesDownloaded: 100,
+      totalBytes: 1000,
+      progressPercent: 10,
+      status: "downloading",
+      error: null,
+    });
+    mockCancelDictationLocalModelDownload.mockResolvedValue(undefined);
+
+    render(
+      <LocalWhisperModels
+        selectedModelId=""
+        onSelectModel={vi.fn()}
+        onModelsChanged={onModelsChanged}
+      />,
+    );
+
+    await waitFor(() =>
+      expect(
+        screen.getByRole("button", { name: /cancel/i }),
+      ).toBeInTheDocument(),
+    );
+
+    await waitFor(() =>
+      expect(mockGetDictationLocalModelDownloadProgress).toHaveBeenCalledWith(
+        "tiny",
+      ),
+      { timeout: 2000 },
+    );
+
+    await user.click(screen.getByRole("button", { name: /cancel/i }));
+
+    await waitFor(() =>
+      expect(mockCancelDictationLocalModelDownload).toHaveBeenCalledWith(
+        "tiny",
+      ),
+    );
+    await waitFor(() =>
+      expect(
+        screen.getByRole("button", { name: /download/i }),
+      ).toBeInTheDocument(),
+    );
+
+    expect(
+      screen.queryByRole("button", { name: /cancel/i }),
+    ).not.toBeInTheDocument();
+    expect(onModelsChanged).not.toHaveBeenCalled();
+  });
+});

From a4b526b5375f785f2e62916063595926c57a693e Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 11:37:35 -0700
Subject: [PATCH 26/30] fix(goose2): persist voice preference updates before
 sync

Signed-off-by: tulsi <tulsi@block.xyz>
---
 .../useVoiceInputPreferences.test.ts          | 69 +++++++++++++++++
 .../chat/hooks/useVoiceInputPreferences.ts    | 76 +++++++++++++------
 .../ui/__tests__/LocalWhisperModels.test.tsx  |  9 ++-
 3 files changed, 125 insertions(+), 29 deletions(-)
 create mode 100644 ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts

diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
new file mode 100644
index 000000000000..af91e0a43b27
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
@@ -0,0 +1,69 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const mockGetClient = vi.fn();
+
+vi.mock("@/shared/api/acpConnection", () => ({
+  getClient: () => mockGetClient(),
+}));
+
+import { useVoiceInputPreferences } from "../useVoiceInputPreferences";
+
+function deferred<T>() {
+  let resolve!: (value: T | PromiseLike<T>) => void;
+  const promise = new Promise<T>((res) => {
+    resolve = res;
+  });
+  return { promise, resolve };
+}
+
+describe("useVoiceInputPreferences", () => {
+  beforeEach(() => {
+    mockGetClient.mockReset();
+  });
+
+  it("broadcasts preference changes only after config persistence settles", async () => {
+    const upsert = vi.fn();
+    const providerRead = deferred<{ value?: unknown }>();
+    const pendingWrite = deferred<void>();
+
+    mockGetClient.mockResolvedValue({
+      goose: {
+        GooseConfigRead: vi
+          .fn()
+          .mockResolvedValueOnce({ value: null })
+          .mockResolvedValueOnce({ value: null })
+          .mockResolvedValueOnce({ value: null })
+          .mockImplementation(() => providerRead.promise),
+        GooseConfigUpsert: upsert.mockImplementation(
+          () => pendingWrite.promise,
+        ),
+        GooseConfigRemove: vi.fn().mockResolvedValue({}),
+      },
+    });
+
+    const eventListener = vi.fn();
+    window.addEventListener("goose:voice-input-preferences", eventListener);
+
+    const { result } = renderHook(() => useVoiceInputPreferences());
+
+    await waitFor(() => expect(result.current.isHydrated).toBe(true));
+
+    act(() => {
+      result.current.setSelectedProvider("openai");
+    });
+
+    expect(eventListener).not.toHaveBeenCalled();
+    expect(result.current.selectedProvider).toBe("openai");
+
+    await act(async () => {
+      pendingWrite.resolve();
+      await pendingWrite.promise;
+    });
+
+    await waitFor(() => expect(eventListener).toHaveBeenCalledTimes(1));
+
+    providerRead.resolve({ value: "openai" });
+    window.removeEventListener("goose:voice-input-preferences", eventListener);
+  });
+});
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index d755576c35de..889083d5fd71 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -113,21 +113,42 @@ export function useVoiceInputPreferences() {
     };
   }, [syncFromConfig]);
 
-  const setRawAutoSubmitPhrases = useCallback((value: string) => {
-    setRawAutoSubmitPhrasesState(value);
-    void writeConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY, value);
+  const dispatchPreferencesEvent = useCallback(() => {
     window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
   }, []);
 
-  const setSelectedProvider = useCallback((value: DictationProvider | null) => {
-    setSelectedProviderState(value);
-    setHasStoredProviderPreferenceState(true);
-    void writeConfigString(
-      VOICE_DICTATION_PROVIDER_CONFIG_KEY,
-      value ?? DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
-    );
-    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-  }, []);
+  const persistAndBroadcast = useCallback(
+    (operation: Promise<void>) => {
+      void operation.finally(() => {
+        dispatchPreferencesEvent();
+      });
+    },
+    [dispatchPreferencesEvent],
+  );
+
+  const setRawAutoSubmitPhrases = useCallback(
+    (value: string) => {
+      setRawAutoSubmitPhrasesState(value);
+      persistAndBroadcast(
+        writeConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY, value),
+      );
+    },
+    [persistAndBroadcast],
+  );
+
+  const setSelectedProvider = useCallback(
+    (value: DictationProvider | null) => {
+      setSelectedProviderState(value);
+      setHasStoredProviderPreferenceState(true);
+      persistAndBroadcast(
+        writeConfigString(
+          VOICE_DICTATION_PROVIDER_CONFIG_KEY,
+          value ?? DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
+        ),
+      );
+    },
+    [persistAndBroadcast],
+  );
 
   // Remove the stored preference entirely, so the user falls through to the
   // default provider on next boot. Distinct from setSelectedProvider(null),
@@ -135,19 +156,24 @@ export function useVoiceInputPreferences() {
   const clearSelectedProvider = useCallback(() => {
     setSelectedProviderState(null);
     setHasStoredProviderPreferenceState(false);
-    void removeConfigKey(VOICE_DICTATION_PROVIDER_CONFIG_KEY);
-    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-  }, []);
-
-  const setPreferredMicrophoneId = useCallback((value: string | null) => {
-    setPreferredMicrophoneIdState(value);
-    if (value) {
-      void writeConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY, value);
-    } else {
-      void removeConfigKey(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY);
-    }
-    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
-  }, []);
+    persistAndBroadcast(removeConfigKey(VOICE_DICTATION_PROVIDER_CONFIG_KEY));
+  }, [persistAndBroadcast]);
+
+  const setPreferredMicrophoneId = useCallback(
+    (value: string | null) => {
+      setPreferredMicrophoneIdState(value);
+      if (value) {
+        persistAndBroadcast(
+          writeConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY, value),
+        );
+      } else {
+        persistAndBroadcast(
+          removeConfigKey(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY),
+        );
+      }
+    },
+    [persistAndBroadcast],
+  );
 
   const autoSubmitPhrases = useMemo(
     () => parseAutoSubmitPhrases(rawAutoSubmitPhrases),
diff --git a/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx b/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
index f2cbca3e4623..b79c34577351 100644
--- a/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
+++ b/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
@@ -77,10 +77,11 @@ describe("LocalWhisperModels", () => {
       ).toBeInTheDocument(),
     );
 
-    await waitFor(() =>
-      expect(mockGetDictationLocalModelDownloadProgress).toHaveBeenCalledWith(
-        "tiny",
-      ),
+    await waitFor(
+      () =>
+        expect(mockGetDictationLocalModelDownloadProgress).toHaveBeenCalledWith(
+          "tiny",
+        ),
       { timeout: 2000 },
     );
 

From fd308e54eb2a4f809a1dc8578aa1c5151c67f296 Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Fri, 17 Apr 2026 12:23:28 -0700
Subject: [PATCH 27/30] fix(goose2): harden voice preference hydration

Signed-off-by: tulsi <tulsi@block.xyz>
---
 .../useVoiceInputPreferences.test.ts          | 37 ++++++++++++++++++
 .../chat/hooks/useVoiceInputPreferences.ts    | 38 +++++++++++++------
 .../src/features/chat/ui/ChatInputToolbar.tsx |  2 +-
 .../chat/ui/__tests__/ChatInput.test.tsx      | 28 ++++++++++++++
 4 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
index af91e0a43b27..8878ac1195aa 100644
--- a/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
@@ -22,6 +22,43 @@ describe("useVoiceInputPreferences", () => {
     mockGetClient.mockReset();
   });
 
+  it("does not hydrate until provider config can be read successfully", async () => {
+    let shouldFailProviderRead = true;
+
+    mockGetClient.mockResolvedValue({
+      goose: {
+        GooseConfigRead: vi.fn().mockImplementation(({ key }) => {
+          if (key === "VOICE_DICTATION_PROVIDER") {
+            if (shouldFailProviderRead) {
+              return Promise.reject(new Error("temporary acp failure"));
+            }
+            return Promise.resolve({ value: "groq" });
+          }
+          return Promise.resolve({ value: null });
+        }),
+        GooseConfigUpsert: vi.fn().mockResolvedValue({}),
+        GooseConfigRemove: vi.fn().mockResolvedValue({}),
+      },
+    });
+
+    const { result } = renderHook(() => useVoiceInputPreferences());
+
+    await act(async () => {});
+
+    expect(result.current.isHydrated).toBe(false);
+    expect(result.current.selectedProvider).toBeNull();
+
+    shouldFailProviderRead = false;
+
+    await act(async () => {
+      window.dispatchEvent(new Event("goose:voice-input-preferences"));
+    });
+
+    await waitFor(() => expect(result.current.isHydrated).toBe(true));
+    expect(result.current.selectedProvider).toBe("groq");
+    expect(result.current.hasStoredProviderPreference).toBe(true);
+  });
+
   it("broadcasts preference changes only after config persistence settles", async () => {
     const upsert = vi.fn();
     const providerRead = deferred<{ value?: unknown }>();
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
index 889083d5fd71..e2fc66f2472c 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -13,13 +13,18 @@ import type { DictationProvider } from "@/shared/types/dictation";
 
 const VOICE_INPUT_PREFERENCES_EVENT = "goose:voice-input-preferences";
 
-async function readConfigString(key: string): Promise<string | null> {
+type ConfigReadResult = { ok: true; value: string | null } | { ok: false };
+
+async function readConfigString(key: string): Promise<ConfigReadResult> {
   try {
     const client = await getClient();
     const response = await client.goose.GooseConfigRead({ key });
-    return typeof response.value === "string" ? response.value : null;
+    return {
+      ok: true,
+      value: typeof response.value === "string" ? response.value : null,
+    };
   } catch {
-    return null;
+    return { ok: false };
   }
 }
 
@@ -60,21 +65,30 @@ export function useVoiceInputPreferences() {
   const [isHydrated, setIsHydrated] = useState(false);
 
   const syncFromConfig = useCallback(async () => {
-    const [phrasesValue, providerValue, micValue] = await Promise.all([
+    const [phrasesResult, providerResult, micResult] = await Promise.all([
       readConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY),
       readConfigString(VOICE_DICTATION_PROVIDER_CONFIG_KEY),
       readConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY),
     ]);
 
-    setRawAutoSubmitPhrasesState(
-      phrasesValue ?? DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
-    );
+    if (phrasesResult.ok) {
+      setRawAutoSubmitPhrasesState(
+        phrasesResult.value ?? DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
+      );
+    }
 
-    if (providerValue === DISABLED_DICTATION_PROVIDER_CONFIG_VALUE) {
+    if (!providerResult.ok) {
+      if (micResult.ok) {
+        setPreferredMicrophoneIdState(micResult.value);
+      }
+      return;
+    }
+
+    if (providerResult.value === DISABLED_DICTATION_PROVIDER_CONFIG_VALUE) {
       setSelectedProviderState(null);
       setHasStoredProviderPreferenceState(true);
-    } else if (providerValue != null) {
-      const normalized = normalizeDictationProvider(providerValue);
+    } else if (providerResult.value != null) {
+      const normalized = normalizeDictationProvider(providerResult.value);
       if (normalized !== null) {
         setSelectedProviderState(normalized);
         setHasStoredProviderPreferenceState(true);
@@ -92,7 +106,9 @@ export function useVoiceInputPreferences() {
       setHasStoredProviderPreferenceState(false);
     }
 
-    setPreferredMicrophoneIdState(micValue);
+    if (micResult.ok) {
+      setPreferredMicrophoneIdState(micResult.value);
+    }
     setIsHydrated(true);
   }, []);
 
diff --git a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
index 94f995419ae9..411c002fa1fc 100644
--- a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
@@ -393,7 +393,7 @@ export function ChatInputToolbar({
                   type="button"
                   variant="ghost"
                   size="icon-sm"
-                  disabled={!voiceEnabled || (disabled && !voiceRecording)}
+                  disabled={!voiceRecording && (!voiceEnabled || disabled)}
                   onClick={onVoiceToggle}
                   aria-label={
                     voiceRecording
diff --git a/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx b/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
index ab35a79800c2..6b60e12d7264 100644
--- a/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
+++ b/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
@@ -3,6 +3,7 @@ import { fireEvent, render, screen } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { useState } from "react";
 import { ChatInput } from "../ChatInput";
+import { ChatInputToolbar } from "../ChatInputToolbar";
 import type { Persona } from "@/shared/types/agents";
 
 const mockVoiceDictation = {
@@ -458,6 +459,33 @@ describe("ChatInput", () => {
     expect(mockVoiceDictation.stopRecording).not.toHaveBeenCalled();
   });
 
+  it("keeps the mic toggle enabled while recording even if voice input becomes unavailable", () => {
+    render(
+      <ChatInputToolbar
+        personas={[]}
+        selectedPersonaId={null}
+        providers={[]}
+        selectedProvider="goose"
+        onProviderChange={vi.fn()}
+        availableModels={[]}
+        selectedProjectId={null}
+        availableProjects={[]}
+        contextTokens={0}
+        contextLimit={0}
+        canSend={false}
+        isStreaming={false}
+        hasQueuedMessage={false}
+        onSend={vi.fn()}
+        voiceEnabled={false}
+        voiceRecording
+        onVoiceToggle={vi.fn()}
+        isCompact={false}
+      />,
+    );
+
+    expect(screen.getByRole("button", { name: "Listening..." })).toBeEnabled();
+  });
+
   it("keeps the selected assistant chip after sending subsequent messages", async () => {
     const onSend = vi.fn();
     const user = userEvent.setup();

From 9da80d0448a85eb63725b6fb32949bb24e93bb9f Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Mon, 20 Apr 2026 10:03:31 -0700
Subject: [PATCH 28/30] chore: refresh rebased voice-input artifacts

Signed-off-by: tulsi <tulsi@block.xyz>
---
 crates/goose-sdk/src/custom_requests.rs |   1 +
 ui/goose2/scripts/check-file-sizes.mjs  |  10 ++
 ui/sdk/src/generated/client.gen.ts      |  83 +++++++++++++
 ui/sdk/src/generated/index.ts           |  42 ++++++-
 ui/sdk/src/generated/types.gen.ts       | 141 ++++++++++++++++++++-
 ui/sdk/src/generated/zod.gen.ts         | 156 +++++++++++++++++++++++-
 6 files changed, 428 insertions(+), 5 deletions(-)

diff --git a/crates/goose-sdk/src/custom_requests.rs b/crates/goose-sdk/src/custom_requests.rs
index af14fd9cc189..609299712c47 100644
--- a/crates/goose-sdk/src/custom_requests.rs
+++ b/crates/goose-sdk/src/custom_requests.rs
@@ -1,6 +1,7 @@
 use sacp::{JsonRpcRequest, JsonRpcResponse};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
 
 /// Schema descriptor for a single custom method, produced by the
 /// `#[custom_methods]` macro's generated `custom_method_schemas()` function.
diff --git a/ui/goose2/scripts/check-file-sizes.mjs b/ui/goose2/scripts/check-file-sizes.mjs
index 07e1d124f273..c5c47459595b 100644
--- a/ui/goose2/scripts/check-file-sizes.mjs
+++ b/ui/goose2/scripts/check-file-sizes.mjs
@@ -50,6 +50,16 @@ const EXCEPTIONS = {
     justification:
       "ACP-backed session overlay persistence, draft migration, and sidebar-facing session merge logic live together for now.",
   },
+  "src/features/chat/ui/ChatInput.tsx": {
+    limit: 510,
+    justification:
+      "Voice dictation send/stop guards, attachment handling, and mention/picker coordination still share one chat composer component.",
+  },
+  "src/features/chat/ui/__tests__/ChatInput.test.tsx": {
+    limit: 510,
+    justification:
+      "Composer regression coverage spans personas, queueing, attachments, and voice-input edge cases in one interaction-heavy suite.",
+  },
   "src-tauri/src/commands/projects.rs": {
     limit: 520,
     justification:
diff --git a/ui/sdk/src/generated/client.gen.ts b/ui/sdk/src/generated/client.gen.ts
index 3bd1e0d13f89..a1eeeee569d3 100644
--- a/ui/sdk/src/generated/client.gen.ts
+++ b/ui/sdk/src/generated/client.gen.ts
@@ -13,6 +13,18 @@ import type {
   CheckSecretRequest,
   CheckSecretResponse,
   DeleteSessionRequest,
+  DictationConfigRequest,
+  DictationConfigResponse,
+  DictationModelCancelRequest,
+  DictationModelDeleteRequest,
+  DictationModelDownloadProgressRequest,
+  DictationModelDownloadProgressResponse,
+  DictationModelDownloadRequest,
+  DictationModelSelectRequest,
+  DictationModelsListRequest,
+  DictationModelsListResponse,
+  DictationTranscribeRequest,
+  DictationTranscribeResponse,
   ExportSessionRequest,
   ExportSessionResponse,
   GetExtensionsRequest,
@@ -43,6 +55,10 @@ import type {
 } from './types.gen.js';
 import {
   zCheckSecretResponse,
+  zDictationConfigResponse,
+  zDictationModelDownloadProgressResponse,
+  zDictationModelsListResponse,
+  zDictationTranscribeResponse,
   zExportSessionResponse,
   zGetExtensionsResponse,
   zGetProviderDetailsResponse,
@@ -174,4 +190,71 @@ export class GooseExtClient {
   async GooseSessionUnarchive(params: UnarchiveSessionRequest): Promise<void> {
     await this.conn.extMethod("_goose/session/unarchive", params);
   }
+
+  async GooseDictationTranscribe(
+    params: DictationTranscribeRequest,
+  ): Promise<DictationTranscribeResponse> {
+    const raw = await this.conn.extMethod(
+      "_goose/dictation/transcribe",
+      params,
+    );
+    return zDictationTranscribeResponse.parse(
+      raw,
+    ) as DictationTranscribeResponse;
+  }
+
+  async GooseDictationConfig(
+    params: DictationConfigRequest,
+  ): Promise<DictationConfigResponse> {
+    const raw = await this.conn.extMethod("_goose/dictation/config", params);
+    return zDictationConfigResponse.parse(raw) as DictationConfigResponse;
+  }
+
+  async GooseDictationModelsList(
+    params: DictationModelsListRequest,
+  ): Promise<DictationModelsListResponse> {
+    const raw = await this.conn.extMethod(
+      "_goose/dictation/models/list",
+      params,
+    );
+    return zDictationModelsListResponse.parse(
+      raw,
+    ) as DictationModelsListResponse;
+  }
+
+  async GooseDictationModelsDownload(
+    params: DictationModelDownloadRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/models/download", params);
+  }
+
+  async GooseDictationModelsDownloadProgress(
+    params: DictationModelDownloadProgressRequest,
+  ): Promise<DictationModelDownloadProgressResponse> {
+    const raw = await this.conn.extMethod(
+      "_goose/dictation/models/download/progress",
+      params,
+    );
+    return zDictationModelDownloadProgressResponse.parse(
+      raw,
+    ) as DictationModelDownloadProgressResponse;
+  }
+
+  async GooseDictationModelsCancel(
+    params: DictationModelCancelRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/models/cancel", params);
+  }
+
+  async GooseDictationModelsDelete(
+    params: DictationModelDeleteRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/models/delete", params);
+  }
+
+  async GooseDictationModelSelect(
+    params: DictationModelSelectRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/model/select", params);
+  }
 }
diff --git a/ui/sdk/src/generated/index.ts b/ui/sdk/src/generated/index.ts
index aa103a439a31..d1886b07d767 100644
--- a/ui/sdk/src/generated/index.ts
+++ b/ui/sdk/src/generated/index.ts
@@ -1,6 +1,6 @@
 // This file is auto-generated by @hey-api/openapi-ts
 
-export type { AddExtensionRequest, ArchiveSessionRequest, CheckSecretRequest, CheckSecretResponse, DeleteSessionRequest, EmptyResponse, ExportSessionRequest, ExportSessionResponse, ExtRequest, ExtResponse, GetExtensionsRequest, GetExtensionsResponse, GetProviderDetailsRequest, GetProviderDetailsResponse, GetProviderModelsRequest, GetProviderModelsResponse, GetSessionExtensionsRequest, GetSessionExtensionsResponse, GetToolsRequest, GetToolsResponse, ImportSessionRequest, ImportSessionResponse, ListProvidersRequest, ListProvidersResponse, ModelEntry, ProviderConfigKey, ProviderDetailEntry, ProviderListEntry, ReadConfigRequest, ReadConfigResponse, ReadResourceRequest, ReadResourceResponse, RemoveConfigRequest, RemoveExtensionRequest, RemoveSecretRequest, UnarchiveSessionRequest, UpdateWorkingDirRequest, UpsertConfigRequest, UpsertSecretRequest } from './types.gen.js';
+export type { AddExtensionRequest, ArchiveSessionRequest, CheckSecretRequest, CheckSecretResponse, DeleteSessionRequest, DictationConfigRequest, DictationConfigResponse, DictationDownloadProgress, DictationLocalModelStatus, DictationModelCancelRequest, DictationModelDeleteRequest, DictationModelDownloadProgressRequest, DictationModelDownloadProgressResponse, DictationModelDownloadRequest, DictationModelOption, DictationModelSelectRequest, DictationModelsListRequest, DictationModelsListResponse, DictationProviderStatusEntry, DictationTranscribeRequest, DictationTranscribeResponse, EmptyResponse, ExportSessionRequest, ExportSessionResponse, ExtRequest, ExtResponse, GetExtensionsRequest, GetExtensionsResponse, GetProviderDetailsRequest, GetProviderDetailsResponse, GetProviderModelsRequest, GetProviderModelsResponse, GetSessionExtensionsRequest, GetSessionExtensionsResponse, GetToolsRequest, GetToolsResponse, ImportSessionRequest, ImportSessionResponse, ListProvidersRequest, ListProvidersResponse, ModelEntry, ProviderConfigKey, ProviderDetailEntry, ProviderListEntry, ReadConfigRequest, ReadConfigResponse, ReadResourceRequest, ReadResourceResponse, RemoveConfigRequest, RemoveExtensionRequest, RemoveSecretRequest, UnarchiveSessionRequest, UpdateWorkingDirRequest, UpsertConfigRequest, UpsertSecretRequest } from './types.gen.js';
 
 export const GOOSE_EXT_METHODS = [
   {
@@ -108,6 +108,46 @@ export const GOOSE_EXT_METHODS = [
     requestType: "UnarchiveSessionRequest",
     responseType: "EmptyResponse",
   },
+  {
+    method: "_goose/dictation/transcribe",
+    requestType: "DictationTranscribeRequest",
+    responseType: "DictationTranscribeResponse",
+  },
+  {
+    method: "_goose/dictation/config",
+    requestType: "DictationConfigRequest",
+    responseType: "DictationConfigResponse",
+  },
+  {
+    method: "_goose/dictation/models/list",
+    requestType: "DictationModelsListRequest",
+    responseType: "DictationModelsListResponse",
+  },
+  {
+    method: "_goose/dictation/models/download",
+    requestType: "DictationModelDownloadRequest",
+    responseType: "EmptyResponse",
+  },
+  {
+    method: "_goose/dictation/models/download/progress",
+    requestType: "DictationModelDownloadProgressRequest",
+    responseType: "DictationModelDownloadProgressResponse",
+  },
+  {
+    method: "_goose/dictation/models/cancel",
+    requestType: "DictationModelCancelRequest",
+    responseType: "EmptyResponse",
+  },
+  {
+    method: "_goose/dictation/models/delete",
+    requestType: "DictationModelDeleteRequest",
+    responseType: "EmptyResponse",
+  },
+  {
+    method: "_goose/dictation/model/select",
+    requestType: "DictationModelSelectRequest",
+    responseType: "EmptyResponse",
+  },
 ] as const;
 
 export type GooseExtMethod = (typeof GOOSE_EXT_METHODS)[number];
diff --git a/ui/sdk/src/generated/types.gen.ts b/ui/sdk/src/generated/types.gen.ts
index e27160830133..15cf78ea75a7 100644
--- a/ui/sdk/src/generated/types.gen.ts
+++ b/ui/sdk/src/generated/types.gen.ts
@@ -281,17 +281,154 @@ export type UnarchiveSessionRequest = {
     sessionId: string;
 };
 
+/**
+ * Transcribe audio via a dictation provider.
+ */
+export type DictationTranscribeRequest = {
+    /**
+     * Base64-encoded audio data
+     */
+    audio: string;
+    /**
+     * MIME type (e.g. "audio/wav", "audio/webm")
+     */
+    mimeType: string;
+    /**
+     * Provider to use: "openai", "groq", "elevenlabs", or "local"
+     */
+    provider: string;
+};
+
+/**
+ * Transcription result.
+ */
+export type DictationTranscribeResponse = {
+    text: string;
+};
+
+/**
+ * Get the configuration status of all dictation providers.
+ */
+export type DictationConfigRequest = {
+    [key: string]: unknown;
+};
+
+/**
+ * Dictation config response — map of provider name to status.
+ */
+export type DictationConfigResponse = {
+    providers: {
+        [key: string]: DictationProviderStatusEntry;
+    };
+};
+
+/**
+ * Per-provider configuration status.
+ */
+export type DictationProviderStatusEntry = {
+    configured: boolean;
+    host?: string | null;
+    description: string;
+    usesProviderConfig: boolean;
+    settingsPath?: string | null;
+    configKey?: string | null;
+    modelConfigKey?: string | null;
+    defaultModel?: string | null;
+    selectedModel?: string | null;
+    availableModels?: Array<DictationModelOption>;
+};
+
+export type DictationModelOption = {
+    id: string;
+    label: string;
+    description: string;
+};
+
+/**
+ * List available local Whisper models with their download status.
+ */
+export type DictationModelsListRequest = {
+    [key: string]: unknown;
+};
+
+export type DictationModelsListResponse = {
+    models: Array<DictationLocalModelStatus>;
+};
+
+export type DictationLocalModelStatus = {
+    id: string;
+    label: string;
+    description: string;
+    sizeMb: number;
+    downloaded: boolean;
+    downloadInProgress: boolean;
+};
+
+/**
+ * Kick off a background download of a local Whisper model.
+ */
+export type DictationModelDownloadRequest = {
+    modelId: string;
+};
+
+/**
+ * Poll the progress of an in-flight download.
+ */
+export type DictationModelDownloadProgressRequest = {
+    modelId: string;
+};
+
+export type DictationModelDownloadProgressResponse = {
+    /**
+     * None when no download is active for this model id.
+     */
+    progress?: DictationDownloadProgress | null;
+};
+
+export type DictationDownloadProgress = {
+    bytesDownloaded: number;
+    totalBytes: number;
+    progressPercent: number;
+    /**
+     * serde lowercase of DownloadStatus: "downloading" | "completed" | "failed" | "cancelled"
+     */
+    status: string;
+    error?: string | null;
+};
+
+/**
+ * Cancel an in-flight download.
+ */
+export type DictationModelCancelRequest = {
+    modelId: string;
+};
+
+/**
+ * Delete a downloaded local Whisper model from disk.
+ */
+export type DictationModelDeleteRequest = {
+    modelId: string;
+};
+
+/**
+ * Persist the user's model selection for a given provider.
+ */
+export type DictationModelSelectRequest = {
+    provider: string;
+    modelId: string;
+};
+
 export type ExtRequest = {
     id: string;
     method: string;
-    params?: AddExtensionRequest | RemoveExtensionRequest | GetToolsRequest | ReadResourceRequest | UpdateWorkingDirRequest | DeleteSessionRequest | GetExtensionsRequest | GetSessionExtensionsRequest | ListProvidersRequest | GetProviderDetailsRequest | GetProviderModelsRequest | ReadConfigRequest | UpsertConfigRequest | RemoveConfigRequest | CheckSecretRequest | UpsertSecretRequest | RemoveSecretRequest | ExportSessionRequest | ImportSessionRequest | ArchiveSessionRequest | UnarchiveSessionRequest | {
+    params?: AddExtensionRequest | RemoveExtensionRequest | GetToolsRequest | ReadResourceRequest | UpdateWorkingDirRequest | DeleteSessionRequest | GetExtensionsRequest | GetSessionExtensionsRequest | ListProvidersRequest | GetProviderDetailsRequest | GetProviderModelsRequest | ReadConfigRequest | UpsertConfigRequest | RemoveConfigRequest | CheckSecretRequest | UpsertSecretRequest | RemoveSecretRequest | ExportSessionRequest | ImportSessionRequest | ArchiveSessionRequest | UnarchiveSessionRequest | DictationTranscribeRequest | DictationConfigRequest | DictationModelsListRequest | DictationModelDownloadRequest | DictationModelDownloadProgressRequest | DictationModelCancelRequest | DictationModelDeleteRequest | DictationModelSelectRequest | {
         [key: string]: unknown;
     } | null;
 };
 
 export type ExtResponse = {
     id: string;
-    result?: EmptyResponse | GetToolsResponse | ReadResourceResponse | GetExtensionsResponse | GetSessionExtensionsResponse | ListProvidersResponse | GetProviderDetailsResponse | GetProviderModelsResponse | ReadConfigResponse | CheckSecretResponse | ExportSessionResponse | ImportSessionResponse | unknown;
+    result?: EmptyResponse | GetToolsResponse | ReadResourceResponse | GetExtensionsResponse | GetSessionExtensionsResponse | ListProvidersResponse | GetProviderDetailsResponse | GetProviderModelsResponse | ReadConfigResponse | CheckSecretResponse | ExportSessionResponse | ImportSessionResponse | DictationTranscribeResponse | DictationConfigResponse | DictationModelsListResponse | DictationModelDownloadProgressResponse | unknown;
 } | {
     error: {
         code: number;
diff --git a/ui/sdk/src/generated/zod.gen.ts b/ui/sdk/src/generated/zod.gen.ts
index 1679d0ae1585..b48935fb2d5d 100644
--- a/ui/sdk/src/generated/zod.gen.ts
+++ b/ui/sdk/src/generated/zod.gen.ts
@@ -271,6 +271,146 @@ export const zUnarchiveSessionRequest = z.object({
     sessionId: z.string()
 });
 
+/**
+ * Transcribe audio via a dictation provider.
+ */
+export const zDictationTranscribeRequest = z.object({
+    audio: z.string(),
+    mimeType: z.string(),
+    provider: z.string()
+});
+
+/**
+ * Transcription result.
+ */
+export const zDictationTranscribeResponse = z.object({
+    text: z.string()
+});
+
+/**
+ * Get the configuration status of all dictation providers.
+ */
+export const zDictationConfigRequest = z.record(z.unknown());
+
+export const zDictationModelOption = z.object({
+    id: z.string(),
+    label: z.string(),
+    description: z.string()
+});
+
+/**
+ * Per-provider configuration status.
+ */
+export const zDictationProviderStatusEntry = z.object({
+    configured: z.boolean(),
+    host: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    description: z.string(),
+    usesProviderConfig: z.boolean(),
+    settingsPath: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    configKey: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    modelConfigKey: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    defaultModel: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    selectedModel: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    availableModels: z.array(zDictationModelOption).optional().default([])
+});
+
+/**
+ * Dictation config response — map of provider name to status.
+ */
+export const zDictationConfigResponse = z.object({
+    providers: z.record(zDictationProviderStatusEntry)
+});
+
+/**
+ * List available local Whisper models with their download status.
+ */
+export const zDictationModelsListRequest = z.record(z.unknown());
+
+export const zDictationLocalModelStatus = z.object({
+    id: z.string(),
+    label: z.string(),
+    description: z.string(),
+    sizeMb: z.number().int().gte(0),
+    downloaded: z.boolean(),
+    downloadInProgress: z.boolean()
+});
+
+export const zDictationModelsListResponse = z.object({
+    models: z.array(zDictationLocalModelStatus)
+});
+
+/**
+ * Kick off a background download of a local Whisper model.
+ */
+export const zDictationModelDownloadRequest = z.object({
+    modelId: z.string()
+});
+
+/**
+ * Poll the progress of an in-flight download.
+ */
+export const zDictationModelDownloadProgressRequest = z.object({
+    modelId: z.string()
+});
+
+export const zDictationDownloadProgress = z.object({
+    bytesDownloaded: z.number().int().gte(0),
+    totalBytes: z.number().int().gte(0),
+    progressPercent: z.number(),
+    status: z.string(),
+    error: z.union([
+        z.string(),
+        z.null()
+    ]).optional()
+});
+
+export const zDictationModelDownloadProgressResponse = z.object({
+    progress: z.union([
+        zDictationDownloadProgress,
+        z.null()
+    ]).optional()
+});
+
+/**
+ * Cancel an in-flight download.
+ */
+export const zDictationModelCancelRequest = z.object({
+    modelId: z.string()
+});
+
+/**
+ * Delete a downloaded local Whisper model from disk.
+ */
+export const zDictationModelDeleteRequest = z.object({
+    modelId: z.string()
+});
+
+/**
+ * Persist the user's model selection for a given provider.
+ */
+export const zDictationModelSelectRequest = z.object({
+    provider: z.string(),
+    modelId: z.string()
+});
+
 export const zExtRequest = z.object({
     id: z.string(),
     method: z.string(),
@@ -296,7 +436,15 @@ export const zExtRequest = z.object({
             zExportSessionRequest,
             zImportSessionRequest,
             zArchiveSessionRequest,
-            zUnarchiveSessionRequest
+            zUnarchiveSessionRequest,
+            zDictationTranscribeRequest,
+            zDictationConfigRequest,
+            zDictationModelsListRequest,
+            zDictationModelDownloadRequest,
+            zDictationModelDownloadProgressRequest,
+            zDictationModelCancelRequest,
+            zDictationModelDeleteRequest,
+            zDictationModelSelectRequest
         ]),
         z.union([
             z.record(z.unknown()),
@@ -321,7 +469,11 @@ export const zExtResponse = z.union([
                 zReadConfigResponse,
                 zCheckSecretResponse,
                 zExportSessionResponse,
-                zImportSessionResponse
+                zImportSessionResponse,
+                zDictationTranscribeResponse,
+                zDictationConfigResponse,
+                zDictationModelsListResponse,
+                zDictationModelDownloadProgressResponse
             ]),
             z.unknown()
         ]).optional()

From 205a3eacc1650832a95f070df3844ecc61cd663f Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Mon, 20 Apr 2026 10:13:34 -0700
Subject: [PATCH 29/30] fix(goose2): gate app test driver webview import on
 macos

Signed-off-by: tulsi <tulsi@block.xyz>
---
 ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs b/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs
index 256b2c29e1f6..0d7c09998b63 100644
--- a/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs
+++ b/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs
@@ -2,7 +2,9 @@ use serde::{Deserialize, Serialize};
 use std::io::{BufRead, BufReader, Write};
 use std::net::TcpListener;
 use std::sync::Mutex;
-use tauri::{AppHandle, Manager, Runtime, WebviewWindow};
+use tauri::{AppHandle, Manager, Runtime};
+#[cfg(target_os = "macos")]
+use tauri::WebviewWindow;
 
 #[derive(Deserialize, Debug)]
 struct TestCommand {

From e739728860bb16bcbb738117ca5402050ad8b65d Mon Sep 17 00:00:00 2001
From: tulsi <tulsi@block.xyz>
Date: Mon, 20 Apr 2026 10:21:56 -0700
Subject: [PATCH 30/30] fix(goose2): harden voice dictation startup state

Signed-off-by: tulsi <tulsi@block.xyz>
---
 .../__tests__/useDictationRecorder.test.ts    | 72 ++++++++++++++
 .../hooks/__tests__/useVoiceDictation.test.ts | 99 +++++++++++++++++++
 .../chat/hooks/useDictationRecorder.ts        |  9 +-
 .../features/chat/hooks/useVoiceDictation.ts  | 14 +--
 4 files changed, 182 insertions(+), 12 deletions(-)
 create mode 100644 ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts
 create mode 100644 ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts

diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts
new file mode 100644
index 000000000000..43b851d3ffc9
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts
@@ -0,0 +1,72 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const mockTranscribeDictation = vi.fn();
+
+vi.mock("@/shared/api/dictation", () => ({
+  transcribeDictation: (...args: unknown[]) => mockTranscribeDictation(...args),
+}));
+
+import { useDictationRecorder } from "../useDictationRecorder";
+
+function deferred<T>() {
+  let resolve!: (value: T | PromiseLike<T>) => void;
+  const promise = new Promise<T>((res) => {
+    resolve = res;
+  });
+  return { promise, resolve };
+}
+
+describe("useDictationRecorder", () => {
+  beforeEach(() => {
+    mockTranscribeDictation.mockReset();
+
+    Object.defineProperty(navigator, "mediaDevices", {
+      configurable: true,
+      value: {
+        getUserMedia: vi.fn(),
+      },
+    });
+  });
+
+  it("lets a second toggle cancel a pending startup", async () => {
+    const pendingStream = deferred<MediaStream>();
+    const stopTrack = vi.fn();
+    const stream = {
+      getTracks: () => [{ stop: stopTrack }],
+    } as unknown as MediaStream;
+
+    vi.mocked(navigator.mediaDevices.getUserMedia).mockReturnValue(
+      pendingStream.promise,
+    );
+
+    const { result } = renderHook(() =>
+      useDictationRecorder({
+        onError: vi.fn(),
+        onTranscription: vi.fn(),
+        preferredMicrophoneId: null,
+        provider: "openai",
+        providerConfigured: true,
+      }),
+    );
+
+    act(() => {
+      result.current.toggleRecording();
+    });
+
+    expect(result.current.isStarting()).toBe(true);
+
+    act(() => {
+      result.current.toggleRecording();
+    });
+
+    await act(async () => {
+      pendingStream.resolve(stream);
+      await pendingStream.promise;
+    });
+
+    await waitFor(() => expect(result.current.isStarting()).toBe(false));
+    expect(result.current.isRecording).toBe(false);
+    expect(stopTrack).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts
new file mode 100644
index 000000000000..a030d44b0f3e
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts
@@ -0,0 +1,99 @@
+import { renderHook, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const mockGetDictationConfig = vi.fn();
+const mockUseDictationRecorder = vi.fn();
+const mockUseVoiceInputPreferences = vi.fn();
+
+vi.mock("@/shared/api/dictation", () => ({
+  getDictationConfig: () => mockGetDictationConfig(),
+}));
+
+vi.mock("../useDictationRecorder", () => ({
+  useDictationRecorder: (options: unknown) => mockUseDictationRecorder(options),
+}));
+
+vi.mock("../useVoiceInputPreferences", () => ({
+  useVoiceInputPreferences: () => mockUseVoiceInputPreferences(),
+}));
+
+import { useVoiceDictation } from "../useVoiceDictation";
+
+describe("useVoiceDictation", () => {
+  beforeEach(() => {
+    mockGetDictationConfig.mockReset();
+    mockUseDictationRecorder.mockReset();
+    mockUseVoiceInputPreferences.mockReset();
+
+    mockUseDictationRecorder.mockReturnValue({
+      isEnabled: false,
+      isRecording: false,
+      isStarting: () => false,
+      isTranscribing: false,
+      startRecording: vi.fn(),
+      stopRecording: vi.fn(),
+      toggleRecording: vi.fn(),
+    });
+  });
+
+  it("defers default provider fallback until preferences hydrate", async () => {
+    const voicePrefs = {
+      autoSubmitPhrases: [],
+      clearSelectedProvider: vi.fn(),
+      hasStoredProviderPreference: false,
+      isHydrated: false,
+      preferredMicrophoneId: null,
+      rawAutoSubmitPhrases: "submit",
+      selectedProvider: null,
+      setPreferredMicrophoneId: vi.fn(),
+      setRawAutoSubmitPhrases: vi.fn(),
+      setSelectedProvider: vi.fn(),
+    };
+
+    mockUseVoiceInputPreferences.mockImplementation(() => voicePrefs);
+    mockGetDictationConfig.mockResolvedValue({
+      openai: {
+        availableModels: [],
+        configured: true,
+        description: "OpenAI",
+        usesProviderConfig: true,
+      },
+    });
+
+    const { rerender } = renderHook(() =>
+      useVoiceDictation({
+        attachments: [],
+        clearAttachments: vi.fn(),
+        onSend: vi.fn(),
+        resetTextarea: vi.fn(),
+        selectedPersonaId: null,
+        setText: vi.fn(),
+        text: "",
+      }),
+    );
+
+    await waitFor(() =>
+      expect(mockGetDictationConfig).toHaveBeenCalledTimes(1),
+    );
+    await waitFor(() =>
+      expect(mockUseDictationRecorder).toHaveBeenLastCalledWith(
+        expect.objectContaining({
+          provider: null,
+          providerConfigured: false,
+        }),
+      ),
+    );
+
+    voicePrefs.isHydrated = true;
+    rerender();
+
+    await waitFor(() =>
+      expect(mockUseDictationRecorder).toHaveBeenLastCalledWith(
+        expect.objectContaining({
+          provider: "openai",
+          providerConfigured: true,
+        }),
+      ),
+    );
+  });
+});
diff --git a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
index 89a71b5009a7..33462376b583 100644
--- a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
+++ b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
@@ -158,7 +158,7 @@ export function useDictationRecorder({
         const text = pendingResultsRef.current.get(nextExpectedSeqRef.current);
         pendingResultsRef.current.delete(nextExpectedSeqRef.current);
         nextExpectedSeqRef.current += 1;
-        if (text && text.trim()) {
+        if (text?.trim()) {
           onTranscriptionRef.current(text);
         }
       }
@@ -173,7 +173,7 @@ export function useDictationRecorder({
           );
           pendingResultsRef.current.delete(nextExpectedSeqRef.current);
           nextExpectedSeqRef.current += 1;
-          if (text && text.trim()) {
+          if (text?.trim()) {
             onTranscriptionRef.current(text);
           }
         }
@@ -373,11 +373,8 @@ export function useDictationRecorder({
   ]);
 
   const toggleRecording = useCallback(() => {
-    // If a startup is already in-flight, swallow the click. The pending
-    // startRecording will either resolve into isRecording=true (and a
-    // subsequent click will correctly stop) or be cancelled via
-    // cancelStartRef if something else calls stopRecording in the meantime.
     if (startingRef.current) {
+      stopRecording({ flushPending: false });
       return;
     }
     if (isRecording) {
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
index b635145f8b28..cfd2d67e81a4 100644
--- a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -75,12 +75,14 @@ export function useVoiceDictation({
     voicePrefs.selectedProvider != null &&
     providerStatuses[voicePrefs.selectedProvider] !== undefined;
 
-  const activeVoiceProvider = storedProviderIsPresent
-    ? voicePrefs.selectedProvider
-    : voicePrefs.hasStoredProviderPreference &&
-        voicePrefs.selectedProvider == null
-      ? null
-      : getDefaultDictationProvider(providerStatuses);
+  const activeVoiceProvider = !voicePrefs.isHydrated
+    ? null
+    : storedProviderIsPresent
+      ? voicePrefs.selectedProvider
+      : voicePrefs.hasStoredProviderPreference &&
+          voicePrefs.selectedProvider == null
+        ? null
+        : getDefaultDictationProvider(providerStatuses);
 
   // If a stored preference points at a provider that's no longer in
   // providerStatuses (feature-flagged off, removed), clear it so next boot