diff --git a/Cargo.lock b/Cargo.lock
index 093e9658825e..e38f0b0f0aa3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4454,6 +4454,7 @@ dependencies = [
  "async-stream",
  "async-trait",
  "axum",
+ "base64 0.22.1",
  "fs-err",
  "futures",
  "goose",
diff --git a/crates/goose-acp/Cargo.toml b/crates/goose-acp/Cargo.toml
index 8bc2b1e7eed5..a7200146b8a8 100644
--- a/crates/goose-acp/Cargo.toml
+++ b/crates/goose-acp/Cargo.toml
@@ -14,6 +14,7 @@ path = "src/bin/generate_acp_schema.rs"
 [features]
 default = ["code-mode", "rustls-tls"]
 code-mode = ["goose/code-mode"]
+local-inference = ["goose/local-inference"]
 rustls-tls = ["goose/rustls-tls", "goose-mcp/rustls-tls"]
 native-tls = ["goose/native-tls", "goose-mcp/native-tls"]
 
@@ -48,6 +49,7 @@ uuid = { workspace = true, features = ["v7"] }
 schemars = { workspace = true, features = ["derive"] }
 goose-acp-macros = { path = "../goose-acp-macros" }
 goose-sdk = { path = "../goose-sdk" }
+base64 = { workspace = true }
 
 [dev-dependencies]
 async-trait = { workspace = true }
diff --git a/crates/goose-acp/acp-meta.json b/crates/goose-acp/acp-meta.json
index 944d227b663f..75f28ef60a98 100644
--- a/crates/goose-acp/acp-meta.json
+++ b/crates/goose-acp/acp-meta.json
@@ -104,6 +104,46 @@
       "method": "_goose/session/unarchive",
       "requestType": "UnarchiveSessionRequest",
       "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/transcribe",
+      "requestType": "DictationTranscribeRequest",
+      "responseType": "DictationTranscribeResponse"
+    },
+    {
+      "method": "_goose/dictation/config",
+      "requestType": "DictationConfigRequest",
+      "responseType": "DictationConfigResponse"
+    },
+    {
+      "method": "_goose/dictation/models/list",
+      "requestType": "DictationModelsListRequest",
+      "responseType": "DictationModelsListResponse"
+    },
+    {
+      "method": "_goose/dictation/models/download",
+      "requestType": "DictationModelDownloadRequest",
+      "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/models/download/progress",
+      "requestType": "DictationModelDownloadProgressRequest",
+      "responseType": "DictationModelDownloadProgressResponse"
+    },
+    {
+      "method": "_goose/dictation/models/cancel",
+      "requestType": "DictationModelCancelRequest",
+      "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/models/delete",
+      "requestType": "DictationModelDeleteRequest",
+      "responseType": "EmptyResponse"
+    },
+    {
+      "method": "_goose/dictation/model/select",
+      "requestType": "DictationModelSelectRequest",
+      "responseType": "EmptyResponse"
     }
   ]
 }
diff --git a/crates/goose-acp/acp-schema.json b/crates/goose-acp/acp-schema.json
index 0f0db1759a37..821de4145e74 100644
--- a/crates/goose-acp/acp-schema.json
+++ b/crates/goose-acp/acp-schema.json
@@ -607,6 +607,329 @@
       "x-side": "agent",
       "x-method": "_goose/session/unarchive"
     },
+    "DictationTranscribeRequest": {
+      "type": "object",
+      "properties": {
+        "audio": {
+          "type": "string",
+          "description": "Base64-encoded audio data"
+        },
+        "mimeType": {
+          "type": "string",
+          "description": "MIME type (e.g. \"audio/wav\", \"audio/webm\")"
+        },
+        "provider": {
+          "type": "string",
+          "description": "Provider to use: \"openai\", \"groq\", \"elevenlabs\", or \"local\""
+        }
+      },
+      "required": [
+        "audio",
+        "mimeType",
+        "provider"
+      ],
+      "description": "Transcribe audio via a dictation provider.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/transcribe"
+    },
+    "DictationTranscribeResponse": {
+      "type": "object",
+      "properties": {
+        "text": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "text"
+      ],
+      "description": "Transcription result.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/transcribe"
+    },
+    "DictationConfigRequest": {
+      "type": "object",
+      "description": "Get the configuration status of all dictation providers.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/config"
+    },
+    "DictationConfigResponse": {
+      "type": "object",
+      "properties": {
+        "providers": {
+          "type": "object",
+          "additionalProperties": {
+            "$ref": "#/$defs/DictationProviderStatusEntry"
+          }
+        }
+      },
+      "required": [
+        "providers"
+      ],
+      "description": "Dictation config response — map of provider name to status.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/config"
+    },
+    "DictationProviderStatusEntry": {
+      "type": "object",
+      "properties": {
+        "configured": {
+          "type": "boolean"
+        },
+        "host": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "description": {
+          "type": "string"
+        },
+        "usesProviderConfig": {
+          "type": "boolean"
+        },
+        "settingsPath": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "configKey": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "modelConfigKey": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "defaultModel": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "selectedModel": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "availableModels": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/DictationModelOption"
+          },
+          "default": []
+        }
+      },
+      "required": [
+        "configured",
+        "description",
+        "usesProviderConfig"
+      ],
+      "description": "Per-provider configuration status."
+    },
+    "DictationModelOption": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "label": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "id",
+        "label",
+        "description"
+      ]
+    },
+    "DictationModelsListRequest": {
+      "type": "object",
+      "description": "List available local Whisper models with their download status.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/list"
+    },
+    "DictationModelsListResponse": {
+      "type": "object",
+      "properties": {
+        "models": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/DictationLocalModelStatus"
+          }
+        }
+      },
+      "required": [
+        "models"
+      ],
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/list"
+    },
+    "DictationLocalModelStatus": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
+        },
+        "label": {
+          "type": "string"
+        },
+        "description": {
+          "type": "string"
+        },
+        "sizeMb": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "downloaded": {
+          "type": "boolean"
+        },
+        "downloadInProgress": {
+          "type": "boolean"
+        }
+      },
+      "required": [
+        "id",
+        "label",
+        "description",
+        "sizeMb",
+        "downloaded",
+        "downloadInProgress"
+      ]
+    },
+    "DictationModelDownloadRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Kick off a background download of a local Whisper model.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/download"
+    },
+    "DictationModelDownloadProgressRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Poll the progress of an in-flight download.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/download/progress"
+    },
+    "DictationModelDownloadProgressResponse": {
+      "type": "object",
+      "properties": {
+        "progress": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/DictationDownloadProgress"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "description": "None when no download is active for this model id."
+        }
+      },
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/download/progress"
+    },
+    "DictationDownloadProgress": {
+      "type": "object",
+      "properties": {
+        "bytesDownloaded": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "totalBytes": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "progressPercent": {
+          "type": "number",
+          "format": "float"
+        },
+        "status": {
+          "type": "string",
+          "description": "serde lowercase of DownloadStatus: \"downloading\" | \"completed\" | \"failed\" | \"cancelled\""
+        },
+        "error": {
+          "type": [
+            "string",
+            "null"
+          ]
+        }
+      },
+      "required": [
+        "bytesDownloaded",
+        "totalBytes",
+        "progressPercent",
+        "status"
+      ]
+    },
+    "DictationModelCancelRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Cancel an in-flight download.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/cancel"
+    },
+    "DictationModelDeleteRequest": {
+      "type": "object",
+      "properties": {
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "modelId"
+      ],
+      "description": "Delete a downloaded local Whisper model from disk.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/models/delete"
+    },
+    "DictationModelSelectRequest": {
+      "type": "object",
+      "properties": {
+        "provider": {
+          "type": "string"
+        },
+        "modelId": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "provider",
+        "modelId"
+      ],
+      "description": "Persist the user's model selection for a given provider.",
+      "x-side": "agent",
+      "x-method": "_goose/dictation/model/select"
+    },
     "ExtRequest": {
       "properties": {
         "id": {
@@ -807,6 +1130,78 @@
                   ],
                   "description": "Params for _goose/session/unarchive",
                   "title": "UnarchiveSessionRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationTranscribeRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/transcribe",
+                  "title": "DictationTranscribeRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationConfigRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/config",
+                  "title": "DictationConfigRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelsListRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/list",
+                  "title": "DictationModelsListRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelDownloadRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/download",
+                  "title": "DictationModelDownloadRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelDownloadProgressRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/download/progress",
+                  "title": "DictationModelDownloadProgressRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelCancelRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/cancel",
+                  "title": "DictationModelCancelRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelDeleteRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/models/delete",
+                  "title": "DictationModelDeleteRequest"
+                },
+                {
+                  "allOf": [
+                    {
+                      "$ref": "#/$defs/DictationModelSelectRequest"
+                    }
+                  ],
+                  "description": "Params for _goose/dictation/model/select",
+                  "title": "DictationModelSelectRequest"
                 }
               ]
             },
@@ -933,6 +1328,38 @@
                         }
                       ],
                       "title": "ImportSessionResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationTranscribeResponse"
+                        }
+                      ],
+                      "title": "DictationTranscribeResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationConfigResponse"
+                        }
+                      ],
+                      "title": "DictationConfigResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationModelsListResponse"
+                        }
+                      ],
+                      "title": "DictationModelsListResponse"
+                    },
+                    {
+                      "allOf": [
+                        {
+                          "$ref": "#/$defs/DictationModelDownloadProgressResponse"
+                        }
+                      ],
+                      "title": "DictationModelDownloadProgressResponse"
                     }
                   ]
                 },
diff --git a/crates/goose-acp/src/server.rs b/crates/goose-acp/src/server.rs
index d1a8212c7507..ca7c0f7ff883 100644
--- a/crates/goose-acp/src/server.rs
+++ b/crates/goose-acp/src/server.rs
@@ -16,6 +16,13 @@ use goose::config::paths::Paths;
 use goose::config::permission::PermissionManager;
 use goose::config::{Config, GooseMode};
 use goose::conversation::message::{ActionRequiredData, Message, MessageContent};
+#[cfg(feature = "local-inference")]
+use goose::dictation::providers::transcribe_local;
+use goose::dictation::providers::{
+    all_providers, is_configured, transcribe_with_provider, DictationProvider,
+};
+#[cfg(feature = "local-inference")]
+use goose::dictation::whisper;
 use goose::mcp_utils::ToolResult;
 use goose::permission::permission_confirmation::PrincipalType;
 use goose::permission::{Permission, PermissionConfirmation};
@@ -68,6 +75,12 @@ pub type AcpProviderFactory = Arc<
 
 const DEFAULT_PROVIDER_ID: &str = "goose";
 const DEFAULT_PROVIDER_LABEL: &str = "Goose (Default)";
+const OPENAI_TRANSCRIPTION_MODEL_CONFIG_KEY: &str = "OPENAI_TRANSCRIPTION_MODEL";
+const GROQ_TRANSCRIPTION_MODEL_CONFIG_KEY: &str = "GROQ_TRANSCRIPTION_MODEL";
+const ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY: &str = "ELEVENLABS_TRANSCRIPTION_MODEL";
+const OPENAI_TRANSCRIPTION_MODEL: &str = "whisper-1";
+const GROQ_TRANSCRIPTION_MODEL: &str = "whisper-large-v3-turbo";
+const ELEVENLABS_TRANSCRIPTION_MODEL: &str = "scribe_v1";
 
 /// In-memory state for an active ACP session.
 ///
@@ -2904,6 +2917,420 @@ impl GooseAcpAgent {
             .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
         Ok(EmptyResponse {})
     }
+
+    #[custom_method(DictationTranscribeRequest)]
+    async fn on_dictation_transcribe(
+        &self,
+        req: DictationTranscribeRequest,
+    ) -> Result<DictationTranscribeResponse, sacp::Error> {
+        use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
+        let config = goose::config::Config::global();
+
+        #[cfg(not(feature = "local-inference"))]
+        if req.provider == "local" {
+            return Err(sacp::Error::invalid_params()
+                .data("Local inference is not available in this build"));
+        }
+
+        let provider: DictationProvider = serde_json::from_value(serde_json::Value::String(
+            req.provider.clone(),
+        ))
+        .map_err(|_| {
+            sacp::Error::invalid_params().data(format!("Unknown provider: {}", req.provider))
+        })?;
+
+        let audio_bytes = BASE64
+            .decode(&req.audio)
+            .map_err(|_| sacp::Error::invalid_params().data("Invalid base64 audio data"))?;
+
+        if audio_bytes.len() > 50 * 1024 * 1024 {
+            return Err(sacp::Error::invalid_params().data("Audio too large (max 50MB)"));
+        }
+
+        let extension = match req.mime_type.as_str() {
+            "audio/webm" | "audio/webm;codecs=opus" => "webm",
+            "audio/mp4" => "mp4",
+            "audio/mpeg" | "audio/mpga" => "mp3",
+            "audio/m4a" => "m4a",
+            "audio/wav" | "audio/x-wav" => "wav",
+            other => {
+                return Err(
+                    sacp::Error::invalid_params().data(format!("Unsupported format: {other}"))
+                )
+            }
+        };
+
+        let text = match provider {
+            DictationProvider::OpenAI => {
+                let model = dictation_selected_model(config, DictationProvider::OpenAI)
+                    .unwrap_or_else(|| OPENAI_TRANSCRIPTION_MODEL.to_string());
+                transcribe_with_provider(
+                    DictationProvider::OpenAI,
+                    "model".to_string(),
+                    model,
+                    audio_bytes,
+                    extension,
+                    &req.mime_type,
+                )
+                .await
+            }
+            DictationProvider::Groq => {
+                let model = dictation_selected_model(config, DictationProvider::Groq)
+                    .unwrap_or_else(|| GROQ_TRANSCRIPTION_MODEL.to_string());
+                transcribe_with_provider(
+                    DictationProvider::Groq,
+                    "model".to_string(),
+                    model,
+                    audio_bytes,
+                    extension,
+                    &req.mime_type,
+                )
+                .await
+            }
+            DictationProvider::ElevenLabs => {
+                let model = dictation_selected_model(config, DictationProvider::ElevenLabs)
+                    .unwrap_or_else(|| ELEVENLABS_TRANSCRIPTION_MODEL.to_string());
+                transcribe_with_provider(
+                    DictationProvider::ElevenLabs,
+                    "model_id".to_string(),
+                    model,
+                    audio_bytes,
+                    extension,
+                    &req.mime_type,
+                )
+                .await
+            }
+            #[cfg(feature = "local-inference")]
+            DictationProvider::Local => transcribe_local(audio_bytes).await,
+        }
+        .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+        Ok(DictationTranscribeResponse { text })
+    }
+
+    #[custom_method(DictationConfigRequest)]
+    async fn on_dictation_config(
+        &self,
+        _req: DictationConfigRequest,
+    ) -> Result<DictationConfigResponse, sacp::Error> {
+        let config = goose::config::Config::global();
+        let mut providers = std::collections::HashMap::new();
+
+        for def in all_providers() {
+            let provider = def.provider;
+            let host = if let Some(host_key) = def.host_key {
+                config
+                    .get(host_key, false)
+                    .ok()
+                    .and_then(|v| v.as_str().map(|s| s.to_string()))
+            } else {
+                None
+            };
+
+            let provider_key = serde_json::to_value(provider)
+                .ok()
+                .and_then(|v| v.as_str().map(|s| s.to_string()))
+                .unwrap_or_else(|| format!("{:?}", provider).to_lowercase());
+            providers.insert(
+                provider_key,
+                DictationProviderStatusEntry {
+                    configured: is_configured(provider),
+                    host,
+                    description: def.description.to_string(),
+                    uses_provider_config: def.uses_provider_config,
+                    settings_path: def.settings_path.map(|s| s.to_string()),
+                    config_key: if !def.uses_provider_config {
+                        Some(def.config_key.to_string())
+                    } else {
+                        None
+                    },
+                    model_config_key: dictation_model_config_key(provider),
+                    default_model: dictation_default_model(provider),
+                    selected_model: dictation_selected_model(config, provider),
+                    available_models: dictation_available_models(provider),
+                },
+            );
+        }
+
+        Ok(DictationConfigResponse { providers })
+    }
+
+    #[custom_method(DictationModelsListRequest)]
+    async fn on_dictation_models_list(
+        &self,
+        _req: DictationModelsListRequest,
+    ) -> Result<DictationModelsListResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::{get_download_manager, DownloadStatus};
+
+            let manager = get_download_manager();
+            let models = whisper::available_models()
+                .iter()
+                .map(|model| DictationLocalModelStatus {
+                    id: model.id.to_string(),
+                    label: model.id.to_string(),
+                    description: model.description.to_string(),
+                    size_mb: model.size_mb,
+                    downloaded: model.is_downloaded(),
+                    download_in_progress: manager
+                        .get_progress(model.id)
+                        .map(|progress| progress.status == DownloadStatus::Downloading)
+                        .unwrap_or(false),
+                })
+                .collect();
+
+            Ok(DictationModelsListResponse { models })
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Ok(DictationModelsListResponse::default())
+    }
+
+    #[custom_method(DictationModelDownloadRequest)]
+    async fn on_dictation_model_download(
+        &self,
+        _req: DictationModelDownloadRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::get_download_manager;
+
+            let model = whisper::get_model(&_req.model_id)
+                .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
+            let manager = get_download_manager();
+            let model_id_for_config = model.id.to_string();
+
+            manager
+                .download_model(
+                    model.id.to_string(),
+                    model.url.to_string(),
+                    model.local_path(),
+                    Some(Box::new(move || {
+                        let config = goose::config::Config::global();
+                        // Only auto-select this model if the user has no model
+                        // currently selected. This prevents silently switching
+                        // the active model mid-session when a user downloads an
+                        // additional model while one is already in use.
+                        let already_selected = config
+                            .get(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY, false)
+                            .ok()
+                            .and_then(|value| value.as_str().map(str::to_owned))
+                            .filter(|model_id| {
+                                // Treat a deleted model file as no active selection
+                                // so a fresh download can auto-select cleanly.
+                                whisper::get_model(model_id)
+                                    .is_some_and(|model| model.is_downloaded())
+                            });
+                        if already_selected.is_none() {
+                            if let Err(e) = config.set_param(
+                                whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY,
+                                model_id_for_config.clone(),
+                            ) {
+                                error!("Failed to save LOCAL_WHISPER_MODEL after download: {}", e);
+                            }
+                        }
+                    })),
+                )
+                .await
+                .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+            Ok(EmptyResponse {})
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Err(sacp::Error::invalid_params().data("Local inference not enabled"))
+    }
+
+    #[custom_method(DictationModelDownloadProgressRequest)]
+    async fn on_dictation_model_download_progress(
+        &self,
+        _req: DictationModelDownloadProgressRequest,
+    ) -> Result<DictationModelDownloadProgressResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::get_download_manager;
+
+            let manager = get_download_manager();
+            let progress =
+                manager
+                    .get_progress(&_req.model_id)
+                    .map(|progress| DictationDownloadProgress {
+                        bytes_downloaded: progress.bytes_downloaded,
+                        total_bytes: progress.total_bytes,
+                        progress_percent: progress.progress_percent,
+                        status: serde_json::to_value(&progress.status)
+                            .ok()
+                            .and_then(|value| value.as_str().map(ToOwned::to_owned))
+                            .unwrap_or_else(|| "unknown".to_string()),
+                        error: progress.error,
+                    });
+
+            Ok(DictationModelDownloadProgressResponse { progress })
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Ok(DictationModelDownloadProgressResponse { progress: None })
+    }
+
+    #[custom_method(DictationModelCancelRequest)]
+    async fn on_dictation_model_cancel(
+        &self,
+        _req: DictationModelCancelRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            use goose::download_manager::get_download_manager;
+
+            let manager = get_download_manager();
+            manager
+                .cancel_download(&_req.model_id)
+                .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+            Ok(EmptyResponse {})
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Err(sacp::Error::invalid_params().data("Local inference not enabled"))
+    }
+
+    #[custom_method(DictationModelDeleteRequest)]
+    async fn on_dictation_model_delete(
+        &self,
+        _req: DictationModelDeleteRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(feature = "local-inference")]
+        {
+            let model = whisper::get_model(&_req.model_id)
+                .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
+            let path = model.local_path();
+
+            if !path.exists() {
+                return Err(sacp::Error::invalid_params().data("Model not downloaded"));
+            }
+
+            std::fs::remove_file(path)
+                .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+            Ok(EmptyResponse {})
+        }
+
+        #[cfg(not(feature = "local-inference"))]
+        Err(sacp::Error::invalid_params().data("Local inference not enabled"))
+    }
+
+    #[custom_method(DictationModelSelectRequest)]
+    async fn on_dictation_model_select(
+        &self,
+        req: DictationModelSelectRequest,
+    ) -> Result<EmptyResponse, sacp::Error> {
+        #[cfg(not(feature = "local-inference"))]
+        if req.provider == "local" {
+            return Err(sacp::Error::invalid_params().data("Local inference not enabled"));
+        }
+
+        let provider: DictationProvider = serde_json::from_value(serde_json::Value::String(
+            req.provider.clone(),
+        ))
+        .map_err(|_| {
+            sacp::Error::invalid_params().data(format!("Unknown provider: {}", req.provider))
+        })?;
+
+        let key = match provider {
+            DictationProvider::OpenAI => OPENAI_TRANSCRIPTION_MODEL_CONFIG_KEY,
+            DictationProvider::Groq => GROQ_TRANSCRIPTION_MODEL_CONFIG_KEY,
+            DictationProvider::ElevenLabs => ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY,
+            #[cfg(feature = "local-inference")]
+            DictationProvider::Local => {
+                let model = whisper::get_model(&req.model_id)
+                    .ok_or_else(|| sacp::Error::invalid_params().data("Unknown model id"))?;
+                if !model.is_downloaded() {
+                    return Err(
+                        sacp::Error::invalid_params().data("Local Whisper model is not downloaded")
+                    );
+                }
+                whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY
+            }
+        };
+
+        goose::config::Config::global()
+            .set_param(key, req.model_id)
+            .map_err(|e| sacp::Error::internal_error().data(e.to_string()))?;
+
+        Ok(EmptyResponse {})
+    }
+}
+
+fn dictation_model_config_key(provider: DictationProvider) -> Option<String> {
+    match provider {
+        DictationProvider::OpenAI => Some(OPENAI_TRANSCRIPTION_MODEL_CONFIG_KEY.to_string()),
+        DictationProvider::Groq => Some(GROQ_TRANSCRIPTION_MODEL_CONFIG_KEY.to_string()),
+        DictationProvider::ElevenLabs => {
+            Some(ELEVENLABS_TRANSCRIPTION_MODEL_CONFIG_KEY.to_string())
+        }
+        #[cfg(feature = "local-inference")]
+        DictationProvider::Local => Some(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY.to_string()),
+    }
+}
+
+fn dictation_default_model(provider: DictationProvider) -> Option<String> {
+    match provider {
+        DictationProvider::OpenAI => Some(OPENAI_TRANSCRIPTION_MODEL.to_string()),
+        DictationProvider::Groq => Some(GROQ_TRANSCRIPTION_MODEL.to_string()),
+        DictationProvider::ElevenLabs => Some(ELEVENLABS_TRANSCRIPTION_MODEL.to_string()),
+        #[cfg(feature = "local-inference")]
+        DictationProvider::Local => Some(whisper::recommend_model().to_string()),
+    }
+}
+
+fn dictation_selected_model(config: &Config, provider: DictationProvider) -> Option<String> {
+    #[cfg(feature = "local-inference")]
+    if provider == DictationProvider::Local {
+        return config
+            .get(whisper::LOCAL_WHISPER_MODEL_CONFIG_KEY, false)
+            .ok()
+            .and_then(|value| value.as_str().map(str::to_owned))
+            .filter(|model_id| whisper::get_model(model_id).is_some())
+            .or_else(|| dictation_default_model(provider));
+    }
+
+    dictation_model_config_key(provider)
+        .and_then(|key| {
+            config
+                .get(&key, false)
+                .ok()
+                .and_then(|value| value.as_str().map(str::to_owned))
+        })
+        .or_else(|| dictation_default_model(provider))
+}
+
+fn dictation_available_models(provider: DictationProvider) -> Vec<DictationModelOption> {
+    match provider {
+        DictationProvider::OpenAI => vec![DictationModelOption {
+            id: OPENAI_TRANSCRIPTION_MODEL.to_string(),
+            label: "Whisper-1".to_string(),
+            description: "OpenAI's hosted Whisper transcription model.".to_string(),
+        }],
+        DictationProvider::Groq => vec![DictationModelOption {
+            id: GROQ_TRANSCRIPTION_MODEL.to_string(),
+            label: "Whisper Large V3 Turbo".to_string(),
+            description: "Groq's fast hosted Whisper transcription model.".to_string(),
+        }],
+        DictationProvider::ElevenLabs => vec![DictationModelOption {
+            id: ELEVENLABS_TRANSCRIPTION_MODEL.to_string(),
+            label: "Scribe v1".to_string(),
+            description: "ElevenLabs' hosted speech-to-text model.".to_string(),
+        }],
+        #[cfg(feature = "local-inference")]
+        DictationProvider::Local => whisper::available_models()
+            .iter()
+            .map(|model| DictationModelOption {
+                id: model.id.to_string(),
+                label: model.id.to_string(),
+                description: model.description.to_string(),
+            })
+            .collect(),
+    }
 }
 
 pub struct GooseAcpHandler {
diff --git a/crates/goose-cli/Cargo.toml b/crates/goose-cli/Cargo.toml
index 6c20a644912a..369cd59606cb 100644
--- a/crates/goose-cli/Cargo.toml
+++ b/crates/goose-cli/Cargo.toml
@@ -71,7 +71,7 @@ winapi = { workspace = true }
 [features]
 default = ["code-mode", "local-inference", "aws-providers", "telemetry", "otel", "rustls-tls"]
 code-mode = ["goose/code-mode", "goose-acp/code-mode"]
-local-inference = ["goose/local-inference"]
+local-inference = ["goose/local-inference", "goose-acp/local-inference"]
 aws-providers = ["goose/aws-providers"]
 cuda = ["goose/cuda", "local-inference"]
 telemetry = ["goose/telemetry"]
diff --git a/crates/goose-sdk/src/custom_requests.rs b/crates/goose-sdk/src/custom_requests.rs
index bbc375be09f3..609299712c47 100644
--- a/crates/goose-sdk/src/custom_requests.rs
+++ b/crates/goose-sdk/src/custom_requests.rs
@@ -1,6 +1,7 @@
 use sacp::{JsonRpcRequest, JsonRpcResponse};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
 
 /// Schema descriptor for a single custom method, produced by the
 /// `#[custom_methods]` macro's generated `custom_method_schemas()` function.
@@ -309,6 +310,154 @@ pub struct ProviderConfigKey {
     pub primary: bool,
 }
 
+/// Transcribe audio via a dictation provider.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/transcribe", response = DictationTranscribeResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationTranscribeRequest {
+    /// Base64-encoded audio data
+    pub audio: String,
+    /// MIME type (e.g. "audio/wav", "audio/webm")
+    pub mime_type: String,
+    /// Provider to use: "openai", "groq", "elevenlabs", or "local"
+    pub provider: String,
+}
+
+/// Transcription result.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+pub struct DictationTranscribeResponse {
+    pub text: String,
+}
+
+/// Get the configuration status of all dictation providers.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/config", response = DictationConfigResponse)]
+pub struct DictationConfigRequest {}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct DictationModelOption {
+    pub id: String,
+    pub label: String,
+    pub description: String,
+}
+
+/// Per-provider configuration status.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationProviderStatusEntry {
+    pub configured: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub host: Option<String>,
+    pub description: String,
+    pub uses_provider_config: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub settings_path: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub config_key: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_config_key: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub default_model: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub selected_model: Option<String>,
+    #[serde(default)]
+    pub available_models: Vec<DictationModelOption>,
+}
+
+/// Dictation config response — map of provider name to status.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+pub struct DictationConfigResponse {
+    pub providers: HashMap<String, DictationProviderStatusEntry>,
+}
+
 /// Empty success response for operations that return no data.
 #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
 pub struct EmptyResponse {}
+
+/// List available local Whisper models with their download status.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(
+    method = "_goose/dictation/models/list",
+    response = DictationModelsListResponse
+)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelsListRequest {}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelsListResponse {
+    pub models: Vec<DictationLocalModelStatus>,
+}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationLocalModelStatus {
+    pub id: String,
+    pub label: String,
+    pub description: String,
+    pub size_mb: u32,
+    pub downloaded: bool,
+    pub download_in_progress: bool,
+}
+
+/// Kick off a background download of a local Whisper model.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/models/download", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDownloadRequest {
+    pub model_id: String,
+}
+
+/// Poll the progress of an in-flight download.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(
+    method = "_goose/dictation/models/download/progress",
+    response = DictationModelDownloadProgressResponse
+)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDownloadProgressRequest {
+    pub model_id: String,
+}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDownloadProgressResponse {
+    /// None when no download is active for this model id.
+    pub progress: Option<DictationDownloadProgress>,
+}
+
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationDownloadProgress {
+    pub bytes_downloaded: u64,
+    pub total_bytes: u64,
+    pub progress_percent: f32,
+    /// serde lowercase of DownloadStatus: "downloading" | "completed" | "failed" | "cancelled"
+    pub status: String,
+    pub error: Option<String>,
+}
+
+/// Cancel an in-flight download.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/models/cancel", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelCancelRequest {
+    pub model_id: String,
+}
+
+/// Delete a downloaded local Whisper model from disk.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/models/delete", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelDeleteRequest {
+    pub model_id: String,
+}
+
+/// Persist the user's model selection for a given provider.
+#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, JsonRpcRequest)]
+#[request(method = "_goose/dictation/model/select", response = EmptyResponse)]
+#[serde(rename_all = "camelCase")]
+pub struct DictationModelSelectRequest {
+    pub provider: String,
+    pub model_id: String,
+}
diff --git a/ui/goose2/scripts/check-file-sizes.mjs b/ui/goose2/scripts/check-file-sizes.mjs
index 07e1d124f273..c5c47459595b 100644
--- a/ui/goose2/scripts/check-file-sizes.mjs
+++ b/ui/goose2/scripts/check-file-sizes.mjs
@@ -50,6 +50,16 @@ const EXCEPTIONS = {
     justification:
       "ACP-backed session overlay persistence, draft migration, and sidebar-facing session merge logic live together for now.",
   },
+  "src/features/chat/ui/ChatInput.tsx": {
+    limit: 510,
+    justification:
+      "Voice dictation send/stop guards, attachment handling, and mention/picker coordination still share one chat composer component.",
+  },
+  "src/features/chat/ui/__tests__/ChatInput.test.tsx": {
+    limit: 510,
+    justification:
+      "Composer regression coverage spans personas, queueing, attachments, and voice-input edge cases in one interaction-heavy suite.",
+  },
   "src-tauri/src/commands/projects.rs": {
     limit: 520,
     justification:
diff --git a/ui/goose2/src-tauri/Info.plist b/ui/goose2/src-tauri/Info.plist
new file mode 100644
index 000000000000..8588d2d741c4
--- /dev/null
+++ b/ui/goose2/src-tauri/Info.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>NSMicrophoneUsageDescription</key>
+  <string>Goose uses your microphone to capture voice input for dictation.</string>
+</dict>
+</plist>
diff --git a/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs b/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs
index 256b2c29e1f6..0d7c09998b63 100644
--- a/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs
+++ b/ui/goose2/src-tauri/plugins/app-test-driver/src/lib.rs
@@ -2,7 +2,9 @@ use serde::{Deserialize, Serialize};
 use std::io::{BufRead, BufReader, Write};
 use std::net::TcpListener;
 use std::sync::Mutex;
-use tauri::{AppHandle, Manager, Runtime, WebviewWindow};
+use tauri::{AppHandle, Manager, Runtime};
+#[cfg(target_os = "macos")]
+use tauri::WebviewWindow;
 
 #[derive(Deserialize, Debug)]
 struct TestCommand {
diff --git a/ui/goose2/src-tauri/src/services/provider_defs.rs b/ui/goose2/src-tauri/src/services/provider_defs.rs
index 0a2a326eaf00..5eea0c0a5a64 100644
--- a/ui/goose2/src-tauri/src/services/provider_defs.rs
+++ b/ui/goose2/src-tauri/src/services/provider_defs.rs
@@ -125,6 +125,17 @@ pub(crate) static PROVIDER_CONFIG_DEFS: &[ProviderConfigDef] = &[
         keys: &[],
         oauth_cache_path: None,
     },
+    // Dictation providers (voice input)
+    ProviderConfigDef {
+        id: "dictation_groq",
+        keys: &[key("GROQ_API_KEY", true, true)],
+        oauth_cache_path: None,
+    },
+    ProviderConfigDef {
+        id: "dictation_elevenlabs",
+        keys: &[key("ELEVENLABS_API_KEY", true, true)],
+        oauth_cache_path: None,
+    },
 ];
 
 pub(crate) fn find_config_key(key_name: &str) -> Option<&'static ConfigKey> {
diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts
new file mode 100644
index 000000000000..43b851d3ffc9
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useDictationRecorder.test.ts
@@ -0,0 +1,72 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const mockTranscribeDictation = vi.fn();
+
+vi.mock("@/shared/api/dictation", () => ({
+  transcribeDictation: (...args: unknown[]) => mockTranscribeDictation(...args),
+}));
+
+import { useDictationRecorder } from "../useDictationRecorder";
+
+function deferred<T>() {
+  let resolve!: (value: T | PromiseLike<T>) => void;
+  const promise = new Promise<T>((res) => {
+    resolve = res;
+  });
+  return { promise, resolve };
+}
+
+describe("useDictationRecorder", () => {
+  beforeEach(() => {
+    mockTranscribeDictation.mockReset();
+
+    Object.defineProperty(navigator, "mediaDevices", {
+      configurable: true,
+      value: {
+        getUserMedia: vi.fn(),
+      },
+    });
+  });
+
+  it("lets a second toggle cancel a pending startup", async () => {
+    const pendingStream = deferred<MediaStream>();
+    const stopTrack = vi.fn();
+    const stream = {
+      getTracks: () => [{ stop: stopTrack }],
+    } as unknown as MediaStream;
+
+    vi.mocked(navigator.mediaDevices.getUserMedia).mockReturnValue(
+      pendingStream.promise,
+    );
+
+    const { result } = renderHook(() =>
+      useDictationRecorder({
+        onError: vi.fn(),
+        onTranscription: vi.fn(),
+        preferredMicrophoneId: null,
+        provider: "openai",
+        providerConfigured: true,
+      }),
+    );
+
+    act(() => {
+      result.current.toggleRecording();
+    });
+
+    expect(result.current.isStarting()).toBe(true);
+
+    act(() => {
+      result.current.toggleRecording();
+    });
+
+    await act(async () => {
+      pendingStream.resolve(stream);
+      await pendingStream.promise;
+    });
+
+    await waitFor(() => expect(result.current.isStarting()).toBe(false));
+    expect(result.current.isRecording).toBe(false);
+    expect(stopTrack).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts
new file mode 100644
index 000000000000..a030d44b0f3e
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceDictation.test.ts
@@ -0,0 +1,99 @@
+import { renderHook, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const mockGetDictationConfig = vi.fn();
+const mockUseDictationRecorder = vi.fn();
+const mockUseVoiceInputPreferences = vi.fn();
+
+vi.mock("@/shared/api/dictation", () => ({
+  getDictationConfig: () => mockGetDictationConfig(),
+}));
+
+vi.mock("../useDictationRecorder", () => ({
+  useDictationRecorder: (options: unknown) => mockUseDictationRecorder(options),
+}));
+
+vi.mock("../useVoiceInputPreferences", () => ({
+  useVoiceInputPreferences: () => mockUseVoiceInputPreferences(),
+}));
+
+import { useVoiceDictation } from "../useVoiceDictation";
+
+describe("useVoiceDictation", () => {
+  beforeEach(() => {
+    mockGetDictationConfig.mockReset();
+    mockUseDictationRecorder.mockReset();
+    mockUseVoiceInputPreferences.mockReset();
+
+    mockUseDictationRecorder.mockReturnValue({
+      isEnabled: false,
+      isRecording: false,
+      isStarting: () => false,
+      isTranscribing: false,
+      startRecording: vi.fn(),
+      stopRecording: vi.fn(),
+      toggleRecording: vi.fn(),
+    });
+  });
+
+  it("defers default provider fallback until preferences hydrate", async () => {
+    const voicePrefs = {
+      autoSubmitPhrases: [],
+      clearSelectedProvider: vi.fn(),
+      hasStoredProviderPreference: false,
+      isHydrated: false,
+      preferredMicrophoneId: null,
+      rawAutoSubmitPhrases: "submit",
+      selectedProvider: null,
+      setPreferredMicrophoneId: vi.fn(),
+      setRawAutoSubmitPhrases: vi.fn(),
+      setSelectedProvider: vi.fn(),
+    };
+
+    mockUseVoiceInputPreferences.mockImplementation(() => voicePrefs);
+    mockGetDictationConfig.mockResolvedValue({
+      openai: {
+        availableModels: [],
+        configured: true,
+        description: "OpenAI",
+        usesProviderConfig: true,
+      },
+    });
+
+    const { rerender } = renderHook(() =>
+      useVoiceDictation({
+        attachments: [],
+        clearAttachments: vi.fn(),
+        onSend: vi.fn(),
+        resetTextarea: vi.fn(),
+        selectedPersonaId: null,
+        setText: vi.fn(),
+        text: "",
+      }),
+    );
+
+    await waitFor(() =>
+      expect(mockGetDictationConfig).toHaveBeenCalledTimes(1),
+    );
+    await waitFor(() =>
+      expect(mockUseDictationRecorder).toHaveBeenLastCalledWith(
+        expect.objectContaining({
+          provider: null,
+          providerConfigured: false,
+        }),
+      ),
+    );
+
+    voicePrefs.isHydrated = true;
+    rerender();
+
+    await waitFor(() =>
+      expect(mockUseDictationRecorder).toHaveBeenLastCalledWith(
+        expect.objectContaining({
+          provider: "openai",
+          providerConfigured: true,
+        }),
+      ),
+    );
+  });
+});
diff --git a/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
new file mode 100644
index 000000000000..8878ac1195aa
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/__tests__/useVoiceInputPreferences.test.ts
@@ -0,0 +1,106 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const mockGetClient = vi.fn();
+
+vi.mock("@/shared/api/acpConnection", () => ({
+  getClient: () => mockGetClient(),
+}));
+
+import { useVoiceInputPreferences } from "../useVoiceInputPreferences";
+
+function deferred<T>() {
+  let resolve!: (value: T | PromiseLike<T>) => void;
+  const promise = new Promise<T>((res) => {
+    resolve = res;
+  });
+  return { promise, resolve };
+}
+
+describe("useVoiceInputPreferences", () => {
+  beforeEach(() => {
+    mockGetClient.mockReset();
+  });
+
+  it("does not hydrate until provider config can be read successfully", async () => {
+    let shouldFailProviderRead = true;
+
+    mockGetClient.mockResolvedValue({
+      goose: {
+        GooseConfigRead: vi.fn().mockImplementation(({ key }) => {
+          if (key === "VOICE_DICTATION_PROVIDER") {
+            if (shouldFailProviderRead) {
+              return Promise.reject(new Error("temporary acp failure"));
+            }
+            return Promise.resolve({ value: "groq" });
+          }
+          return Promise.resolve({ value: null });
+        }),
+        GooseConfigUpsert: vi.fn().mockResolvedValue({}),
+        GooseConfigRemove: vi.fn().mockResolvedValue({}),
+      },
+    });
+
+    const { result } = renderHook(() => useVoiceInputPreferences());
+
+    await act(async () => {});
+
+    expect(result.current.isHydrated).toBe(false);
+    expect(result.current.selectedProvider).toBeNull();
+
+    shouldFailProviderRead = false;
+
+    await act(async () => {
+      window.dispatchEvent(new Event("goose:voice-input-preferences"));
+    });
+
+    await waitFor(() => expect(result.current.isHydrated).toBe(true));
+    expect(result.current.selectedProvider).toBe("groq");
+    expect(result.current.hasStoredProviderPreference).toBe(true);
+  });
+
+  it("broadcasts preference changes only after config persistence settles", async () => {
+    const upsert = vi.fn();
+    const providerRead = deferred<{ value?: unknown }>();
+    const pendingWrite = deferred<void>();
+
+    mockGetClient.mockResolvedValue({
+      goose: {
+        GooseConfigRead: vi
+          .fn()
+          .mockResolvedValueOnce({ value: null })
+          .mockResolvedValueOnce({ value: null })
+          .mockResolvedValueOnce({ value: null })
+          .mockImplementation(() => providerRead.promise),
+        GooseConfigUpsert: upsert.mockImplementation(
+          () => pendingWrite.promise,
+        ),
+        GooseConfigRemove: vi.fn().mockResolvedValue({}),
+      },
+    });
+
+    const eventListener = vi.fn();
+    window.addEventListener("goose:voice-input-preferences", eventListener);
+
+    const { result } = renderHook(() => useVoiceInputPreferences());
+
+    await waitFor(() => expect(result.current.isHydrated).toBe(true));
+
+    act(() => {
+      result.current.setSelectedProvider("openai");
+    });
+
+    expect(eventListener).not.toHaveBeenCalled();
+    expect(result.current.selectedProvider).toBe("openai");
+
+    await act(async () => {
+      pendingWrite.resolve();
+      await pendingWrite.promise;
+    });
+
+    await waitFor(() => expect(eventListener).toHaveBeenCalledTimes(1));
+
+    providerRead.resolve({ value: "openai" });
+    window.removeEventListener("goose:voice-input-preferences", eventListener);
+  });
+});
diff --git a/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
new file mode 100644
index 000000000000..33462376b583
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/useDictationRecorder.ts
@@ -0,0 +1,415 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { transcribeDictation } from "@/shared/api/dictation";
+import type { DictationProvider } from "@/shared/types/dictation";
+import {
+  advanceVadState,
+  createInitialVadState,
+  getFrameRms,
+} from "../lib/dictationVad";
+
+interface UseDictationRecorderOptions {
+  provider: DictationProvider | null;
+  providerConfigured: boolean;
+  preferredMicrophoneId: string | null;
+  onError: (message: string) => void;
+  onTranscription: (text: string) => void;
+}
+
+const SAMPLE_RATE = 16000;
+
+function encodeWav(samples: Float32Array, sampleRate: number): ArrayBuffer {
+  const buffer = new ArrayBuffer(44 + samples.length * 2);
+  const view = new DataView(buffer);
+  const write = (offset: number, value: string) => {
+    for (let index = 0; index < value.length; index += 1) {
+      view.setUint8(offset + index, value.charCodeAt(index));
+    }
+  };
+
+  write(0, "RIFF");
+  view.setUint32(4, 36 + samples.length * 2, true);
+  write(8, "WAVE");
+  write(12, "fmt ");
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true);
+  view.setUint16(22, 1, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * 2, true);
+  view.setUint16(32, 2, true);
+  view.setUint16(34, 16, true);
+  write(36, "data");
+  view.setUint32(40, samples.length * 2, true);
+
+  let offset = 44;
+  for (let index = 0; index < samples.length; index += 1) {
+    const sample = Math.max(-1, Math.min(1, samples[index] ?? 0));
+    view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true);
+    offset += 2;
+  }
+
+  return buffer;
+}
+
+function blobToBase64(blob: Blob): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.onloadend = () => resolve(String(reader.result).split(",")[1] ?? "");
+    reader.onerror = () => reject(reader.error);
+    reader.readAsDataURL(blob);
+  });
+}
+
+function toErrorMessage(error: unknown) {
+  if (error instanceof Error && error.message) {
+    return error.message;
+  }
+
+  return "Voice input failed";
+}
+
+export function useDictationRecorder({
+  provider,
+  providerConfigured,
+  preferredMicrophoneId,
+  onError,
+  onTranscription,
+}: UseDictationRecorderOptions) {
+  const [isRecording, setIsRecording] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const processorRef = useRef<ScriptProcessorNode | null>(null);
+  const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const samplesRef = useRef<Float32Array[]>([]);
+  const vadStateRef = useRef(createInitialVadState());
+  const pendingTranscriptionsRef = useRef(0);
+  const generationRef = useRef(0);
+  // Per-generation sequence numbers so out-of-order transcription responses
+  // can be reassembled into the order the chunks were captured. Without this,
+  // a later chunk whose API call resolves faster can be appended before an
+  // earlier, slower one — scrambling long dictation sessions with variable
+  // API latency. Empty transcriptions still occupy a slot so they don't block
+  // subsequent chunks.
+  const chunkSeqRef = useRef(0);
+  const nextExpectedSeqRef = useRef(0);
+  const pendingResultsRef = useRef<Map<number, string>>(new Map());
+  // Guards against overlapping startRecording calls while getUserMedia is
+  // pending (user double-clicks the mic before the first startup resolves).
+  const startingRef = useRef(false);
+  // Signals to an in-flight startRecording that the user has asked to stop.
+  // When true, the startup path tears down any just-acquired stream instead
+  // of flipping isRecording to true — otherwise the OS mic indicator would
+  // stay on after the user tried to stop/send.
+  const cancelStartRef = useRef(false);
+  const providerRef = useRef(provider);
+  providerRef.current = provider;
+  const onErrorRef = useRef(onError);
+  onErrorRef.current = onError;
+  const onTranscriptionRef = useRef(onTranscription);
+  onTranscriptionRef.current = onTranscription;
+
+  const isEnabled = Boolean(provider && providerConfigured);
+
+  const cleanupAudioGraph = useCallback(() => {
+    processorRef.current?.disconnect();
+    processorRef.current = null;
+    sourceRef.current?.disconnect();
+    sourceRef.current = null;
+    void audioContextRef.current?.close();
+    audioContextRef.current = null;
+    streamRef.current?.getTracks().forEach((track) => {
+      track.stop();
+    });
+    streamRef.current = null;
+  }, []);
+
+  const transcribeChunk = useCallback(async (samples: Float32Array) => {
+    const activeProvider = providerRef.current;
+    if (!activeProvider) {
+      return;
+    }
+
+    const gen = generationRef.current;
+    const mySeq = chunkSeqRef.current;
+    chunkSeqRef.current += 1;
+    pendingTranscriptionsRef.current += 1;
+    setIsTranscribing(true);
+
+    try {
+      const wavBlob = new Blob([encodeWav(samples, SAMPLE_RATE)], {
+        type: "audio/wav",
+      });
+      const audio = await blobToBase64(wavBlob);
+      const response = await transcribeDictation({
+        audio,
+        mimeType: "audio/wav",
+        provider: activeProvider,
+      });
+
+      if (gen !== generationRef.current) {
+        return;
+      }
+
+      // Buffer by sequence number, then drain any contiguous prefix so
+      // emissions to onTranscription stay in capture order even when API
+      // responses resolve out of order.
+      pendingResultsRef.current.set(mySeq, response.text);
+      while (pendingResultsRef.current.has(nextExpectedSeqRef.current)) {
+        const text = pendingResultsRef.current.get(nextExpectedSeqRef.current);
+        pendingResultsRef.current.delete(nextExpectedSeqRef.current);
+        nextExpectedSeqRef.current += 1;
+        if (text?.trim()) {
+          onTranscriptionRef.current(text);
+        }
+      }
+    } catch (error) {
+      onErrorRef.current(toErrorMessage(error));
+      // Unblock the queue so a failure doesn't stall every subsequent chunk.
+      if (gen === generationRef.current) {
+        pendingResultsRef.current.set(mySeq, "");
+        while (pendingResultsRef.current.has(nextExpectedSeqRef.current)) {
+          const text = pendingResultsRef.current.get(
+            nextExpectedSeqRef.current,
+          );
+          pendingResultsRef.current.delete(nextExpectedSeqRef.current);
+          nextExpectedSeqRef.current += 1;
+          if (text?.trim()) {
+            onTranscriptionRef.current(text);
+          }
+        }
+      }
+    } finally {
+      pendingTranscriptionsRef.current -= 1;
+      if (pendingTranscriptionsRef.current === 0) {
+        setIsTranscribing(false);
+      }
+    }
+  }, []);
+
+  const flushPendingSamples = useCallback(() => {
+    const chunks = samplesRef.current;
+    if (chunks.length === 0) {
+      return;
+    }
+
+    const totalSamples = chunks.reduce(
+      (count, chunk) => count + chunk.length,
+      0,
+    );
+    const merged = new Float32Array(totalSamples);
+    let offset = 0;
+    for (const chunk of chunks) {
+      merged.set(chunk, offset);
+      offset += chunk.length;
+    }
+
+    samplesRef.current = [];
+    void transcribeChunk(merged);
+  }, [transcribeChunk]);
+
+  const stopRecording = useCallback(
+    (options?: { flushPending?: boolean }) => {
+      const flushPending = options?.flushPending ?? true;
+
+      // Signal any in-flight startRecording to abort. If getUserMedia is
+      // still pending or the audio graph hasn't been wired up yet, the
+      // startup path will see this flag and clean up the just-acquired
+      // stream instead of flipping isRecording to true.
+      cancelStartRef.current = true;
+
+      if (flushPending && samplesRef.current.length > 0) {
+        flushPendingSamples();
+      } else if (!flushPending) {
+        samplesRef.current = [];
+        generationRef.current += 1;
+        // Reset chunk-ordering state so the new generation starts at seq 0.
+        // In-flight chunks from the old generation bail at the gen check in
+        // transcribeChunk without touching the pending map.
+        chunkSeqRef.current = 0;
+        nextExpectedSeqRef.current = 0;
+        pendingResultsRef.current.clear();
+      }
+
+      vadStateRef.current = createInitialVadState();
+      cleanupAudioGraph();
+      setIsRecording(false);
+    },
+    [cleanupAudioGraph, flushPendingSamples],
+  );
+
+  const handleFrame = useCallback(
+    (samples: Float32Array) => {
+      const { decision, nextState } = advanceVadState(
+        vadStateRef.current,
+        getFrameRms(samples),
+      );
+      vadStateRef.current = nextState;
+
+      if (decision === "ignore") {
+        return;
+      }
+
+      if (decision === "discard") {
+        samplesRef.current = [];
+        return;
+      }
+
+      samplesRef.current.push(new Float32Array(samples));
+
+      if (decision === "append_and_flush") {
+        flushPendingSamples();
+      }
+    },
+    [flushPendingSamples],
+  );
+
+  const startRecording = useCallback(async () => {
+    if (!isEnabled || !provider) {
+      onError("Voice input is not configured");
+      return;
+    }
+
+    // Bail if a startup is already in-flight or we're already recording.
+    // Without this guard, a rapid second click (before getUserMedia resolves)
+    // would kick off a parallel recorder setup and leak a MediaStream — the
+    // OS mic indicator would stay on after the user thought they'd stopped.
+    if (startingRef.current || isRecording) {
+      return;
+    }
+
+    startingRef.current = true;
+    cancelStartRef.current = false;
+
+    try {
+      const audioConstraints: MediaTrackConstraints = {
+        autoGainControl: true,
+        echoCancellation: true,
+        noiseSuppression: true,
+      };
+
+      if (preferredMicrophoneId) {
+        audioConstraints.deviceId = { exact: preferredMicrophoneId };
+      }
+
+      let stream: MediaStream;
+      try {
+        stream = await navigator.mediaDevices.getUserMedia({
+          audio: audioConstraints,
+        });
+      } catch (error) {
+        if (
+          preferredMicrophoneId &&
+          error instanceof DOMException &&
+          (error.name === "NotFoundError" ||
+            error.name === "OverconstrainedError")
+        ) {
+          delete audioConstraints.deviceId;
+          stream = await navigator.mediaDevices.getUserMedia({
+            audio: audioConstraints,
+          });
+        } else {
+          throw error;
+        }
+      }
+
+      // If stopRecording was called while getUserMedia was pending (e.g.,
+      // user clicked Send before the mic finished setting up), tear down
+      // the freshly-acquired stream immediately and bail. Otherwise the
+      // MediaStream tracks stay hot and the OS mic indicator lingers.
+      if (cancelStartRef.current) {
+        stream.getTracks().forEach((track) => {
+          track.stop();
+        });
+        return;
+      }
+
+      streamRef.current = stream;
+      samplesRef.current = [];
+      vadStateRef.current = createInitialVadState();
+
+      const context = new AudioContext({ sampleRate: SAMPLE_RATE });
+      audioContextRef.current = context;
+      await context.resume();
+
+      // Check again after the async context.resume() — stopRecording may
+      // have fired while we were awaiting.
+      if (cancelStartRef.current) {
+        cleanupAudioGraph();
+        return;
+      }
+
+      const source = context.createMediaStreamSource(stream);
+      const processor = context.createScriptProcessor(1024, 1, 1);
+      const silence = context.createGain();
+      silence.gain.value = 0;
+
+      processor.onaudioprocess = (event) => {
+        const channel = event.inputBuffer.getChannelData(0);
+        handleFrame(new Float32Array(channel));
+      };
+
+      source.connect(processor);
+      processor.connect(silence);
+      silence.connect(context.destination);
+
+      sourceRef.current = source;
+      processorRef.current = processor;
+      setIsRecording(true);
+    } catch (error) {
+      stopRecording({ flushPending: false });
+      onError(toErrorMessage(error));
+    } finally {
+      startingRef.current = false;
+    }
+  }, [
+    cleanupAudioGraph,
+    handleFrame,
+    isEnabled,
+    isRecording,
+    onError,
+    preferredMicrophoneId,
+    provider,
+    stopRecording,
+  ]);
+
+  const toggleRecording = useCallback(() => {
+    if (startingRef.current) {
+      stopRecording({ flushPending: false });
+      return;
+    }
+    if (isRecording) {
+      stopRecording();
+    } else {
+      void startRecording();
+    }
+  }, [isRecording, startRecording, stopRecording]);
+
+  useEffect(
+    () => () => {
+      stopRecording({ flushPending: false });
+    },
+    [stopRecording],
+  );
+
+  useEffect(() => {
+    if (!provider && isRecording) {
+      stopRecording({ flushPending: false });
+    }
+  }, [isRecording, provider, stopRecording]);
+
+  // Imperative check for consumers (e.g. handleSend) who need to know at
+  // click time whether a startup is pending. Uses a function rather than a
+  // state value because startingRef is a ref (no render on change) and we
+  // only need the answer when the consumer is deciding what to do *now*.
+  const isStarting = useCallback(() => startingRef.current, []);
+
+  return {
+    isEnabled,
+    isRecording,
+    isTranscribing,
+    isStarting,
+    startRecording,
+    stopRecording,
+    toggleRecording,
+  };
+}
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
new file mode 100644
index 000000000000..cfd2d67e81a4
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/useVoiceDictation.ts
@@ -0,0 +1,181 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { getDictationConfig } from "@/shared/api/dictation";
+import type { DictationProviderStatus } from "@/shared/types/dictation";
+import type { ChatAttachmentDraft } from "@/shared/types/messages";
+import { useDictationRecorder } from "./useDictationRecorder";
+import { useVoiceInputPreferences } from "./useVoiceInputPreferences";
+import {
+  appendTranscribedText,
+  getAutoSubmitMatch,
+  getDefaultDictationProvider,
+  VOICE_DICTATION_CONFIG_EVENT,
+} from "../lib/voiceInput";
+
+interface UseVoiceDictationOptions {
+  text: string;
+  setText: (value: string) => void;
+  attachments: ChatAttachmentDraft[];
+  clearAttachments: () => void;
+  selectedPersonaId: string | null;
+  onSend: (
+    text: string,
+    personaId?: string,
+    attachments?: ChatAttachmentDraft[],
+  ) => void;
+  resetTextarea: () => void;
+  /**
+   * When true, auto-submit on trigger phrase will NOT call `onSend`.
+   * Instead, the trigger phrase is stripped and the remaining transcription
+   * is left in the textarea for the user to review and send manually.
+   * Caller should set this to match `ChatInput`'s own send guards
+   * (queued-message lockout, outer `disabled` state, etc.) so voice
+   * auto-submit can't bypass the UI's protection against extra sends
+   * during an active run.
+   */
+  isSendLocked?: boolean;
+}
+
+export function useVoiceDictation({
+  text,
+  setText,
+  attachments,
+  clearAttachments,
+  selectedPersonaId,
+  onSend,
+  resetTextarea,
+  isSendLocked = false,
+}: UseVoiceDictationOptions) {
+  const voicePrefs = useVoiceInputPreferences();
+  const [providerStatuses, setProviderStatuses] = useState<
+    Partial<Record<string, DictationProviderStatus>>
+  >({});
+
+  const fetchDictationConfig = useCallback(() => {
+    getDictationConfig()
+      .then(setProviderStatuses)
+      .catch(() => {});
+  }, []);
+
+  useEffect(() => {
+    fetchDictationConfig();
+    window.addEventListener(VOICE_DICTATION_CONFIG_EVENT, fetchDictationConfig);
+    return () =>
+      window.removeEventListener(
+        VOICE_DICTATION_CONFIG_EVENT,
+        fetchDictationConfig,
+      );
+  }, [fetchDictationConfig]);
+
+  // Treat the stored preference as valid only when it actually appears in
+  // `providerStatuses`. If the stored value points at a provider that's been
+  // feature-flagged off or removed, fall through to the default so voice
+  // input isn't silently disabled. The explicit "off" state
+  // (`hasStoredProviderPreference && selectedProvider == null`) is preserved.
+  const storedProviderIsPresent =
+    voicePrefs.selectedProvider != null &&
+    providerStatuses[voicePrefs.selectedProvider] !== undefined;
+
+  const activeVoiceProvider = !voicePrefs.isHydrated
+    ? null
+    : storedProviderIsPresent
+      ? voicePrefs.selectedProvider
+      : voicePrefs.hasStoredProviderPreference &&
+          voicePrefs.selectedProvider == null
+        ? null
+        : getDefaultDictationProvider(providerStatuses);
+
+  // If a stored preference points at a provider that's no longer in
+  // providerStatuses (feature-flagged off, removed), clear it so next boot
+  // falls through to the default cleanly instead of re-detecting the stale
+  // value every session.
+  useEffect(() => {
+    if (
+      voicePrefs.selectedProvider != null &&
+      Object.keys(providerStatuses).length > 0 &&
+      providerStatuses[voicePrefs.selectedProvider] === undefined
+    ) {
+      voicePrefs.clearSelectedProvider();
+    }
+  }, [providerStatuses, voicePrefs]);
+
+  const providerConfigured =
+    activeVoiceProvider != null &&
+    providerStatuses[activeVoiceProvider]?.configured === true;
+
+  const stopRecordingRef = useRef<
+    (options?: { flushPending?: boolean }) => void
+  >(() => {});
+
+  // Mirror `text` in a ref so `handleTranscription` always sees the latest
+  // value, even when `useDictationRecorder` fires multiple callbacks in the
+  // same tick before React has applied the first setText. Without this, two
+  // concurrent callbacks would both read a stale `text` from closure and the
+  // second would overwrite the first fragment, dropping dictated words.
+  //
+  // Assign during render (not in a post-render `useEffect`) so there is no
+  // commit-window race: if the user types a character in the textarea and a
+  // transcription callback resolves before the effect runs, the callback
+  // would otherwise read the previous `text` and clobber the user's edit.
+  // Writing to `ref.current` during render is explicitly supported by React
+  // (see `providerRef.current = provider;` in `useDictationRecorder.ts`).
+  const textRef = useRef(text);
+  textRef.current = text;
+
+  const handleTranscription = useCallback(
+    (fragment: string) => {
+      const latest = textRef.current;
+      const match = getAutoSubmitMatch(fragment, voicePrefs.autoSubmitPhrases);
+      if (match) {
+        const merged = appendTranscribedText(latest, match.textWithoutPhrase);
+        if (!merged.trim()) {
+          return;
+        }
+        stopRecordingRef.current({ flushPending: false });
+        if (isSendLocked) {
+          // Parent UI is blocking sends (queued message, disabled, etc.).
+          // Strip the trigger phrase and leave the transcription in the
+          // textarea so the user can send it manually when the lock clears.
+          setText(merged);
+          textRef.current = merged;
+          return;
+        }
+        onSend(
+          merged.trim(),
+          selectedPersonaId ?? undefined,
+          attachments.length > 0 ? attachments : undefined,
+        );
+        setText("");
+        textRef.current = "";
+        clearAttachments();
+        resetTextarea();
+      } else {
+        const merged = appendTranscribedText(latest, fragment);
+        setText(merged);
+        textRef.current = merged;
+      }
+    },
+    [
+      attachments,
+      clearAttachments,
+      isSendLocked,
+      onSend,
+      resetTextarea,
+      selectedPersonaId,
+      setText,
+      voicePrefs.autoSubmitPhrases,
+    ],
+  );
+
+  const handleVoiceError = useCallback((_message: string) => {}, []);
+
+  const dictation = useDictationRecorder({
+    provider: activeVoiceProvider,
+    providerConfigured,
+    preferredMicrophoneId: voicePrefs.preferredMicrophoneId,
+    onError: handleVoiceError,
+    onTranscription: handleTranscription,
+  });
+  stopRecordingRef.current = dictation.stopRecording;
+
+  return dictation;
+}
diff --git a/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
new file mode 100644
index 000000000000..e2fc66f2472c
--- /dev/null
+++ b/ui/goose2/src/features/chat/hooks/useVoiceInputPreferences.ts
@@ -0,0 +1,211 @@
+import { useCallback, useEffect, useMemo, useState } from "react";
+import { getClient } from "@/shared/api/acpConnection";
+import {
+  DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
+  DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
+  VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY,
+  VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY,
+  VOICE_DICTATION_PROVIDER_CONFIG_KEY,
+  normalizeDictationProvider,
+  parseAutoSubmitPhrases,
+} from "../lib/voiceInput";
+import type { DictationProvider } from "@/shared/types/dictation";
+
+const VOICE_INPUT_PREFERENCES_EVENT = "goose:voice-input-preferences";
+
+type ConfigReadResult = { ok: true; value: string | null } | { ok: false };
+
+async function readConfigString(key: string): Promise<ConfigReadResult> {
+  try {
+    const client = await getClient();
+    const response = await client.goose.GooseConfigRead({ key });
+    return {
+      ok: true,
+      value: typeof response.value === "string" ? response.value : null,
+    };
+  } catch {
+    return { ok: false };
+  }
+}
+
+async function writeConfigString(key: string, value: string): Promise<void> {
+  try {
+    const client = await getClient();
+    await client.goose.GooseConfigUpsert({ key, value });
+  } catch {
+    // goose config may be unavailable
+  }
+}
+
+async function removeConfigKey(key: string): Promise<void> {
+  try {
+    const client = await getClient();
+    await client.goose.GooseConfigRemove({ key });
+  } catch {
+    // goose config may be unavailable
+  }
+}
+
+export function useVoiceInputPreferences() {
+  const [rawAutoSubmitPhrases, setRawAutoSubmitPhrasesState] = useState<string>(
+    DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
+  );
+  const [selectedProvider, setSelectedProviderState] =
+    useState<DictationProvider | null>(null);
+  const [hasStoredProviderPreference, setHasStoredProviderPreferenceState] =
+    useState<boolean>(false);
+  const [preferredMicrophoneId, setPreferredMicrophoneIdState] = useState<
+    string | null
+  >(null);
+  // Flips true after the first syncFromConfig completes so consumers can
+  // distinguish "no stored preference" from "the ACP round-trip hasn't
+  // finished yet." Without this, a consumer that auto-writes a default when
+  // hasStoredProviderPreference is false can race ahead and overwrite the
+  // user's saved choice before it loads.
+  const [isHydrated, setIsHydrated] = useState(false);
+
+  const syncFromConfig = useCallback(async () => {
+    const [phrasesResult, providerResult, micResult] = await Promise.all([
+      readConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY),
+      readConfigString(VOICE_DICTATION_PROVIDER_CONFIG_KEY),
+      readConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY),
+    ]);
+
+    if (phrasesResult.ok) {
+      setRawAutoSubmitPhrasesState(
+        phrasesResult.value ?? DEFAULT_AUTO_SUBMIT_PHRASES_RAW,
+      );
+    }
+
+    if (!providerResult.ok) {
+      if (micResult.ok) {
+        setPreferredMicrophoneIdState(micResult.value);
+      }
+      return;
+    }
+
+    if (providerResult.value === DISABLED_DICTATION_PROVIDER_CONFIG_VALUE) {
+      setSelectedProviderState(null);
+      setHasStoredProviderPreferenceState(true);
+    } else if (providerResult.value != null) {
+      const normalized = normalizeDictationProvider(providerResult.value);
+      if (normalized !== null) {
+        setSelectedProviderState(normalized);
+        setHasStoredProviderPreferenceState(true);
+      } else {
+        // Stored value isn't a recognized provider (stale from an older
+        // build, typo, etc.). Treat as no preference — don't pin the user
+        // to voice-off — and clear the config key so future boots fall
+        // through to the default cleanly.
+        setSelectedProviderState(null);
+        setHasStoredProviderPreferenceState(false);
+        void removeConfigKey(VOICE_DICTATION_PROVIDER_CONFIG_KEY);
+      }
+    } else {
+      setSelectedProviderState(null);
+      setHasStoredProviderPreferenceState(false);
+    }
+
+    if (micResult.ok) {
+      setPreferredMicrophoneIdState(micResult.value);
+    }
+    setIsHydrated(true);
+  }, []);
+
+  useEffect(() => {
+    void syncFromConfig();
+    const handler = () => {
+      void syncFromConfig();
+    };
+    window.addEventListener(
+      VOICE_INPUT_PREFERENCES_EVENT,
+      handler as EventListener,
+    );
+    return () => {
+      window.removeEventListener(
+        VOICE_INPUT_PREFERENCES_EVENT,
+        handler as EventListener,
+      );
+    };
+  }, [syncFromConfig]);
+
+  const dispatchPreferencesEvent = useCallback(() => {
+    window.dispatchEvent(new Event(VOICE_INPUT_PREFERENCES_EVENT));
+  }, []);
+
+  const persistAndBroadcast = useCallback(
+    (operation: Promise<void>) => {
+      void operation.finally(() => {
+        dispatchPreferencesEvent();
+      });
+    },
+    [dispatchPreferencesEvent],
+  );
+
+  const setRawAutoSubmitPhrases = useCallback(
+    (value: string) => {
+      setRawAutoSubmitPhrasesState(value);
+      persistAndBroadcast(
+        writeConfigString(VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY, value),
+      );
+    },
+    [persistAndBroadcast],
+  );
+
+  const setSelectedProvider = useCallback(
+    (value: DictationProvider | null) => {
+      setSelectedProviderState(value);
+      setHasStoredProviderPreferenceState(true);
+      persistAndBroadcast(
+        writeConfigString(
+          VOICE_DICTATION_PROVIDER_CONFIG_KEY,
+          value ?? DISABLED_DICTATION_PROVIDER_CONFIG_VALUE,
+        ),
+      );
+    },
+    [persistAndBroadcast],
+  );
+
+  // Remove the stored preference entirely, so the user falls through to the
+  // default provider on next boot. Distinct from setSelectedProvider(null),
+  // which pins the user to "voice off" via a sentinel value.
+  const clearSelectedProvider = useCallback(() => {
+    setSelectedProviderState(null);
+    setHasStoredProviderPreferenceState(false);
+    persistAndBroadcast(removeConfigKey(VOICE_DICTATION_PROVIDER_CONFIG_KEY));
+  }, [persistAndBroadcast]);
+
+  const setPreferredMicrophoneId = useCallback(
+    (value: string | null) => {
+      setPreferredMicrophoneIdState(value);
+      if (value) {
+        persistAndBroadcast(
+          writeConfigString(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY, value),
+        );
+      } else {
+        persistAndBroadcast(
+          removeConfigKey(VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY),
+        );
+      }
+    },
+    [persistAndBroadcast],
+  );
+
+  const autoSubmitPhrases = useMemo(
+    () => parseAutoSubmitPhrases(rawAutoSubmitPhrases),
+    [rawAutoSubmitPhrases],
+  );
+
+  return {
+    autoSubmitPhrases,
+    clearSelectedProvider,
+    hasStoredProviderPreference,
+    isHydrated,
+    preferredMicrophoneId,
+    rawAutoSubmitPhrases,
+    selectedProvider,
+    setPreferredMicrophoneId,
+    setRawAutoSubmitPhrases,
+    setSelectedProvider,
+  };
+}
diff --git a/ui/goose2/src/features/chat/lib/dictationVad.test.ts b/ui/goose2/src/features/chat/lib/dictationVad.test.ts
new file mode 100644
index 000000000000..89e96045c507
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/dictationVad.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, it } from "vitest";
+import { advanceVadState, createInitialVadState } from "./dictationVad";
+
+function runFrames(levels: number[]) {
+  const decisions: string[] = [];
+  let state = createInitialVadState();
+
+  for (const level of levels) {
+    const result = advanceVadState(state, level);
+    decisions.push(result.decision);
+    state = result.nextState;
+  }
+
+  return decisions;
+}
+
+describe("dictationVad", () => {
+  it("ignores silence-only audio", () => {
+    expect(runFrames([0, 0, 0, 0])).toEqual([
+      "ignore",
+      "ignore",
+      "ignore",
+      "ignore",
+    ]);
+  });
+
+  it("discards short noise bursts that never confirm speech", () => {
+    expect(runFrames([0.03, 0, 0, 0])).toEqual([
+      "append",
+      "append",
+      "append",
+      "discard",
+    ]);
+  });
+
+  it("flushes a chunk after speech followed by trailing silence", () => {
+    expect(runFrames([0.03, 0.03, 0.03, 0, 0, 0, 0, 0, 0])).toContain(
+      "append_and_flush",
+    );
+  });
+
+  it("returns to ignoring silence after a flush, ready for another chunk", () => {
+    const decisions = runFrames([
+      0.03, 0.03, 0.03, 0, 0, 0, 0, 0, 0, 0.03, 0.03, 0.03, 0, 0, 0, 0, 0, 0,
+    ]);
+
+    expect(
+      decisions.filter((decision) => decision === "append_and_flush"),
+    ).toHaveLength(2);
+  });
+});
diff --git a/ui/goose2/src/features/chat/lib/dictationVad.ts b/ui/goose2/src/features/chat/lib/dictationVad.ts
new file mode 100644
index 000000000000..0b4561e8cbae
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/dictationVad.ts
@@ -0,0 +1,147 @@
+export type VadPhase = "idle" | "primed" | "speaking" | "trailing";
+
+export type VadDecision = "ignore" | "append" | "append_and_flush" | "discard";
+
+export interface VadState {
+  phase: VadPhase;
+  speechFrames: number;
+  silenceFrames: number;
+  framesInChunk: number;
+}
+
+const SPEECH_RMS_THRESHOLD = 0.018;
+const SPEECH_CONFIRMATION_FRAMES = 2;
+const MAX_PRIMED_SILENCE_FRAMES = 2;
+const TRAILING_SILENCE_FRAMES = 6;
+const MIN_SPEECH_FRAMES = 3;
+
+export function createInitialVadState(): VadState {
+  return {
+    phase: "idle",
+    speechFrames: 0,
+    silenceFrames: 0,
+    framesInChunk: 0,
+  };
+}
+
+export function getFrameRms(samples: Float32Array): number {
+  let sum = 0;
+  for (let index = 0; index < samples.length; index += 1) {
+    const value = samples[index] ?? 0;
+    sum += value * value;
+  }
+
+  return Math.sqrt(sum / Math.max(samples.length, 1));
+}
+
+export function advanceVadState(
+  state: VadState,
+  frameRms: number,
+): { decision: VadDecision; nextState: VadState } {
+  const isSpeech = frameRms >= SPEECH_RMS_THRESHOLD;
+
+  if (state.phase === "idle") {
+    if (!isSpeech) {
+      return { decision: "ignore" as const, nextState: state };
+    }
+
+    return {
+      decision: "append" as const,
+      nextState: {
+        phase: "primed" as const,
+        speechFrames: 1,
+        silenceFrames: 0,
+        framesInChunk: 1,
+      },
+    };
+  }
+
+  if (state.phase === "primed") {
+    if (isSpeech) {
+      const speechFrames = state.speechFrames + 1;
+      return {
+        decision: "append" as const,
+        nextState: {
+          phase:
+            speechFrames >= SPEECH_CONFIRMATION_FRAMES ? "speaking" : "primed",
+          speechFrames,
+          silenceFrames: 0,
+          framesInChunk: state.framesInChunk + 1,
+        },
+      };
+    }
+
+    const silenceFrames = state.silenceFrames + 1;
+    if (silenceFrames > MAX_PRIMED_SILENCE_FRAMES) {
+      return {
+        decision: "discard" as const,
+        nextState: createInitialVadState(),
+      };
+    }
+
+    return {
+      decision: "append" as const,
+      nextState: {
+        ...state,
+        silenceFrames,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  if (state.phase === "speaking") {
+    if (isSpeech) {
+      return {
+        decision: "append" as const,
+        nextState: {
+          phase: "speaking" as const,
+          speechFrames: state.speechFrames + 1,
+          silenceFrames: 0,
+          framesInChunk: state.framesInChunk + 1,
+        },
+      };
+    }
+
+    return {
+      decision: "append" as const,
+      nextState: {
+        phase: "trailing" as const,
+        speechFrames: state.speechFrames,
+        silenceFrames: 1,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  if (isSpeech) {
+    return {
+      decision: "append" as const,
+      nextState: {
+        phase: "speaking" as const,
+        speechFrames: state.speechFrames + 1,
+        silenceFrames: 0,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  const silenceFrames = state.silenceFrames + 1;
+  if (silenceFrames < TRAILING_SILENCE_FRAMES) {
+    return {
+      decision: "append" as const,
+      nextState: {
+        ...state,
+        silenceFrames,
+        framesInChunk: state.framesInChunk + 1,
+      },
+    };
+  }
+
+  return {
+    decision:
+      state.speechFrames >= MIN_SPEECH_FRAMES
+        ? ("append_and_flush" as const)
+        : ("discard" as const),
+    nextState: createInitialVadState(),
+  };
+}
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.test.ts b/ui/goose2/src/features/chat/lib/voiceInput.test.ts
new file mode 100644
index 000000000000..452e5bf2d189
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/voiceInput.test.ts
@@ -0,0 +1,97 @@
+import { describe, expect, it } from "vitest";
+import {
+  appendTranscribedText,
+  getDefaultDictationProvider,
+  getAutoSubmitMatch,
+  parseAutoSubmitPhrases,
+  replaceTrailingTranscribedText,
+} from "./voiceInput";
+
+describe("voiceInput helpers", () => {
+  it("parses comma-separated auto-submit phrases", () => {
+    expect(parseAutoSubmitPhrases(" submit, Ship It ,submit ,, ")).toEqual([
+      "submit",
+      "ship it",
+    ]);
+  });
+
+  it("appends dictated text without smashing words together", () => {
+    expect(appendTranscribedText("hello", "world")).toBe("hello world");
+    expect(appendTranscribedText("hello ", "world")).toBe("hello world");
+    expect(appendTranscribedText("hello", ", world")).toBe("hello, world");
+  });
+
+  it("replaces only the trailing dictated segment", () => {
+    expect(
+      replaceTrailingTranscribedText(
+        "draft dictated text",
+        "dictated text",
+        "dictated text submit",
+      ),
+    ).toBe("draft dictated text submit");
+  });
+
+  it("matches auto-submit phrases only at the end of dictated text", () => {
+    expect(getAutoSubmitMatch("please submit now", ["submit"])).toBeNull();
+    expect(getAutoSubmitMatch("please SUBMIT.", ["submit"])).toEqual({
+      matchedPhrase: "submit",
+      textWithoutPhrase: "please",
+    });
+  });
+
+  it("strips the full raw phrase span when internal whitespace is repeated", () => {
+    // The phrase "ship it" is matched against the *normalized* text, where
+    // "ship   it" collapses to "ship it" (7 chars). But in the raw text the
+    // phrase occupies 9 chars — slicing by -phrase.length would leave a
+    // dangling "sh" on the end. The fix walks the raw text with a regex so
+    // the slice index reflects the actual phrase span in the raw string.
+    expect(getAutoSubmitMatch("hello ship   it", ["ship it"])).toEqual({
+      matchedPhrase: "ship it",
+      textWithoutPhrase: "hello",
+    });
+  });
+
+  it("picks the first configured dictation provider by priority", () => {
+    expect(
+      getDefaultDictationProvider({
+        openai: {
+          configured: false,
+          description: "OpenAI",
+          usesProviderConfig: true,
+          availableModels: [],
+        },
+        groq: {
+          configured: true,
+          description: "Groq",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+        local: {
+          configured: true,
+          description: "Local",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+      }),
+    ).toBe("groq");
+  });
+
+  it("falls back to the first available provider when none are configured", () => {
+    expect(
+      getDefaultDictationProvider({
+        elevenlabs: {
+          configured: false,
+          description: "ElevenLabs",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+        local: {
+          configured: false,
+          description: "Local",
+          usesProviderConfig: false,
+          availableModels: [],
+        },
+      }),
+    ).toBe("local");
+  });
+});
diff --git a/ui/goose2/src/features/chat/lib/voiceInput.ts b/ui/goose2/src/features/chat/lib/voiceInput.ts
new file mode 100644
index 000000000000..b349f89165ca
--- /dev/null
+++ b/ui/goose2/src/features/chat/lib/voiceInput.ts
@@ -0,0 +1,199 @@
+import type {
+  DictationProvider,
+  DictationProviderStatus,
+} from "@/shared/types/dictation";
+
+// goose config keys — stored in the user's goose config.yaml via the
+// _goose/config/{read,upsert,remove} ACP methods, not localStorage.
+export const VOICE_AUTO_SUBMIT_PHRASES_CONFIG_KEY = "VOICE_AUTO_SUBMIT_PHRASES";
+export const VOICE_DICTATION_PROVIDER_CONFIG_KEY = "VOICE_DICTATION_PROVIDER";
+export const VOICE_DICTATION_PREFERRED_MIC_CONFIG_KEY =
+  "VOICE_DICTATION_PREFERRED_MIC";
+export const VOICE_DICTATION_CONFIG_EVENT = "goose:voice-dictation-config";
+export const DISABLED_DICTATION_PROVIDER_CONFIG_VALUE = "__disabled__";
+
+export const DEFAULT_AUTO_SUBMIT_PHRASES_RAW = "submit";
+
+const TRAILING_PUNCTUATION_REGEX = /[\s"'`.,!?;:)\]}]+$/u;
+
+function escapeRegExp(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+function normalizePhrase(value: string): string {
+  return value
+    .toLowerCase()
+    .replace(/\s+/g, " ")
+    .trim()
+    .replace(TRAILING_PUNCTUATION_REGEX, "")
+    .trim();
+}
+
+export function parseAutoSubmitPhrases(rawValue: string | null | undefined) {
+  if (!rawValue) {
+    return [];
+  }
+
+  return Array.from(
+    new Set(
+      rawValue
+        .split(",")
+        .map((value) => normalizePhrase(value))
+        .filter(Boolean),
+    ),
+  );
+}
+
+export function normalizeDictationProvider(
+  value: string | null | undefined,
+): DictationProvider | null {
+  if (
+    value === "openai" ||
+    value === "groq" ||
+    value === "elevenlabs" ||
+    value === "local"
+  ) {
+    return value;
+  }
+
+  return null;
+}
+
+export function getDefaultDictationProvider(
+  providerStatuses: Partial<Record<DictationProvider, DictationProviderStatus>>,
+): DictationProvider | null {
+  const configuredProviderPriority: DictationProvider[] = [
+    "openai",
+    "groq",
+    "elevenlabs",
+    "local",
+  ];
+  const fallbackProviderPriority: DictationProvider[] = [
+    "local",
+    "openai",
+    "groq",
+    "elevenlabs",
+  ];
+
+  for (const provider of configuredProviderPriority) {
+    if (providerStatuses[provider]?.configured) {
+      return provider;
+    }
+  }
+
+  for (const provider of fallbackProviderPriority) {
+    if (providerStatuses[provider]) {
+      return provider;
+    }
+  }
+
+  return null;
+}
+
+export function appendTranscribedText(baseText: string, fragment: string) {
+  const normalizedFragment = fragment.replace(/\s+/g, " ").trim();
+  if (!normalizedFragment) {
+    return baseText;
+  }
+
+  if (!baseText.trim()) {
+    return normalizedFragment;
+  }
+
+  if (/[\s([{/-]$/.test(baseText) || /^[,.;!?)]/.test(normalizedFragment)) {
+    return `${baseText}${normalizedFragment}`;
+  }
+
+  return `${baseText} ${normalizedFragment}`;
+}
+
+export function replaceTrailingTranscribedText(
+  fullText: string,
+  previousTranscribedText: string,
+  nextTranscribedText: string,
+) {
+  if (!previousTranscribedText) {
+    return appendTranscribedText(fullText, nextTranscribedText);
+  }
+
+  if (fullText.endsWith(previousTranscribedText)) {
+    return appendTranscribedText(
+      fullText.slice(0, -previousTranscribedText.length),
+      nextTranscribedText,
+    );
+  }
+
+  const trimmedPreviousText = previousTranscribedText.trim();
+  if (trimmedPreviousText && fullText.endsWith(trimmedPreviousText)) {
+    return appendTranscribedText(
+      fullText.slice(0, -trimmedPreviousText.length),
+      nextTranscribedText,
+    );
+  }
+
+  return appendTranscribedText(fullText, nextTranscribedText);
+}
+
+export function getAutoSubmitMatch(
+  transcribedText: string,
+  autoSubmitPhrases: string[],
+) {
+  const normalizedTranscribedText = normalizePhrase(transcribedText);
+  if (!normalizedTranscribedText) {
+    return null;
+  }
+
+  const sortedPhrases = [...autoSubmitPhrases].sort(
+    (left, right) => right.length - left.length,
+  );
+
+  for (const phrase of sortedPhrases) {
+    if (!normalizedTranscribedText.endsWith(phrase)) {
+      continue;
+    }
+
+    const phraseStartIndex = normalizedTranscribedText.length - phrase.length;
+    if (
+      phraseStartIndex > 0 &&
+      normalizedTranscribedText[phraseStartIndex - 1] !== " "
+    ) {
+      continue;
+    }
+
+    // Map the phrase back to the *raw* transcribed text. `phrase.length` is
+    // the length in normalized form (whitespace collapsed to single spaces,
+    // lowercased, trailing punctuation stripped). Applying -phrase.length
+    // directly to trimmedText undercounts whenever the raw text has repeated
+    // whitespace or mixed case, chopping off legitimate content. Instead,
+    // match the phrase at the end of the raw text using a regex that allows
+    // flexible whitespace between words, so the slice index reflects the
+    // actual start of the phrase in the raw string.
+    const trimmedText = transcribedText.replace(TRAILING_PUNCTUATION_REGEX, "");
+    const phraseWords = phrase.split(" ").filter(Boolean).map(escapeRegExp);
+    const phrasePattern = new RegExp(
+      `(^|\\s)(${phraseWords.join("\\s+")})\\s*$`,
+      "iu",
+    );
+    const rawMatch = trimmedText.match(phrasePattern);
+    const phraseStartOffset =
+      rawMatch && rawMatch.index !== undefined
+        ? rawMatch.index + (rawMatch[1]?.length ?? 0)
+        : trimmedText.length - phrase.length;
+    const textWithoutPhrase = trimmedText.slice(0, phraseStartOffset).trimEnd();
+
+    return {
+      matchedPhrase: phrase,
+      textWithoutPhrase,
+    };
+  }
+
+  return null;
+}
+
+export function notifyVoiceDictationConfigChanged() {
+  try {
+    window.dispatchEvent(new Event(VOICE_DICTATION_CONFIG_EVENT));
+  } catch {
+    // no-op
+  }
+}
diff --git a/ui/goose2/src/features/chat/ui/ChatInput.tsx b/ui/goose2/src/features/chat/ui/ChatInput.tsx
index 9b40f2b768f3..4a21f9a60c1d 100644
--- a/ui/goose2/src/features/chat/ui/ChatInput.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInput.tsx
@@ -22,6 +22,7 @@ import {
 } from "../hooks/useChatInputAttachments";
 import type { ModelOption } from "../types";
 import { ChatInputAttachments } from "./ChatInputAttachments";
+import { useVoiceDictation } from "../hooks/useVoiceDictation";
 
 export interface ProjectOption {
   id: string;
@@ -121,6 +122,25 @@ export function ChatInput({
     clearAttachments,
   } = useChatInputAttachments();
 
+  const resetTextarea = useCallback(() => {
+    if (textareaRef.current) {
+      textareaRef.current.style.height = "auto";
+    }
+  }, []);
+
+  const hasQueuedMessage = queuedMessage !== null;
+
+  const dictation = useVoiceDictation({
+    text,
+    setText,
+    attachments,
+    clearAttachments,
+    selectedPersonaId,
+    onSend,
+    resetTextarea,
+    isSendLocked: hasQueuedMessage || disabled,
+  });
+
   const activePersona = useMemo(
     () => personas.find((persona) => persona.id === selectedPersonaId) ?? null,
     [personas, selectedPersonaId],
@@ -133,7 +153,6 @@ export function ChatInput({
   );
   const stickyPersona = activePersona;
 
-  const hasQueuedMessage = queuedMessage !== null;
   const canSend =
     (text.trim().length > 0 || attachments.length > 0) &&
     !hasQueuedMessage &&
@@ -182,6 +201,24 @@ export function ChatInput({
       return;
     }
 
+    // If recording, stop without waiting for final flush and send what's
+    // already transcribed into the textarea. This makes Send a single click
+    // even while the mic is hot; any in-flight audio after the user clicked
+    // Send is intentionally dropped.
+    //
+    // Also handles the edge case where the user clicks Send while a
+    // getUserMedia startup is still pending (isRecording is still false but
+    // a stream is about to be acquired) — stopRecording sets the internal
+    // cancel flag so the pending startup tears itself down instead of
+    // leaving the OS mic indicator on.
+    if (
+      dictation.isRecording ||
+      dictation.isTranscribing ||
+      dictation.isStarting()
+    ) {
+      dictation.stopRecording({ flushPending: false });
+    }
+
     onSend(
       text.trim(),
       selectedPersonaId ?? undefined,
@@ -196,6 +233,7 @@ export function ChatInput({
     attachments,
     canSend,
     clearAttachments,
+    dictation,
     onSend,
     selectedPersonaId,
     setText,
@@ -408,7 +446,13 @@ export function ChatInput({
                   onChange={handleInput}
                   onKeyDown={handleKeyDown}
                   onPaste={handlePaste}
-                  placeholder={effectivePlaceholder}
+                  placeholder={
+                    dictation.isRecording
+                      ? t("toolbar.voiceInputRecording")
+                      : dictation.isTranscribing
+                        ? t("toolbar.voiceInputTranscribing")
+                        : effectivePlaceholder
+                  }
                   disabled={disabled}
                   rows={1}
                   className="mb-3 min-h-[36px] max-h-[200px] w-full resize-none bg-transparent px-1 text-[14px] leading-relaxed text-foreground placeholder:font-light placeholder:text-muted-foreground/60 focus:outline-none focus-visible:ring-0 focus-visible:ring-offset-0 disabled:opacity-60"
@@ -447,6 +491,10 @@ export function ChatInput({
                 onSend={handleSend}
                 onStop={onStop}
                 isCompact={isCompact}
+                voiceEnabled={dictation.isEnabled}
+                voiceRecording={dictation.isRecording}
+                voiceTranscribing={dictation.isTranscribing}
+                onVoiceToggle={dictation.toggleRecording}
               />
             </div>
           </Popover>
diff --git a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
index 3e25b8f084ce..411c002fa1fc 100644
--- a/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
+++ b/ui/goose2/src/features/chat/ui/ChatInputToolbar.tsx
@@ -90,6 +90,11 @@ interface ChatInputToolbarProps {
   onAttachFiles?: () => void;
   onAttachFolders?: () => void;
   disabled?: boolean;
+  // Voice
+  voiceEnabled?: boolean;
+  voiceRecording?: boolean;
+  voiceTranscribing?: boolean;
+  onVoiceToggle?: () => void;
   // Layout
   isCompact: boolean;
 }
@@ -124,6 +129,10 @@ export function ChatInputToolbar({
   onAttachFiles,
   onAttachFolders,
   disabled = false,
+  voiceEnabled = false,
+  voiceRecording = false,
+  voiceTranscribing = false,
+  onVoiceToggle,
   isCompact,
 }: ChatInputToolbarProps) {
   const { t } = useTranslation("chat");
@@ -384,14 +393,32 @@ export function ChatInputToolbar({
                   type="button"
                   variant="ghost"
                   size="icon-sm"
-                  disabled
-                  aria-label={t("toolbar.voiceInputSoon")}
+                  disabled={!voiceRecording && (!voiceEnabled || disabled)}
+                  onClick={onVoiceToggle}
+                  aria-label={
+                    voiceRecording
+                      ? t("toolbar.voiceInputRecording")
+                      : t("toolbar.voiceInput")
+                  }
+                  className={cn(
+                    voiceRecording &&
+                      "bg-destructive/10 text-destructive hover:bg-destructive/20 hover:text-destructive",
+                    voiceTranscribing && "animate-pulse",
+                  )}
                 >
                   <Mic />
                 </Button>
               </span>
             </TooltipTrigger>
-            <TooltipContent>{t("toolbar.voiceInputSoon")}</TooltipContent>
+            <TooltipContent>
+              {!voiceEnabled
+                ? t("toolbar.voiceInputDisabled")
+                : voiceRecording
+                  ? t("toolbar.voiceInputRecording")
+                  : voiceTranscribing
+                    ? t("toolbar.voiceInputTranscribing")
+                    : t("toolbar.voiceInput")}
+            </TooltipContent>
           </Tooltip>
         </div>
 
diff --git a/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx b/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
index 0561892dd011..6b60e12d7264 100644
--- a/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
+++ b/ui/goose2/src/features/chat/ui/__tests__/ChatInput.test.tsx
@@ -3,8 +3,22 @@ import { fireEvent, render, screen } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { useState } from "react";
 import { ChatInput } from "../ChatInput";
+import { ChatInputToolbar } from "../ChatInputToolbar";
 import type { Persona } from "@/shared/types/agents";
 
+const mockVoiceDictation = {
+  isEnabled: true,
+  isRecording: false,
+  isTranscribing: false,
+  isStarting: vi.fn(() => false),
+  stopRecording: vi.fn(),
+  toggleRecording: vi.fn(),
+};
+
+vi.mock("../hooks/useVoiceDictation", () => ({
+  useVoiceDictation: () => mockVoiceDictation,
+}));
+
 vi.mock("@/features/providers/hooks/useAgentProviderStatus", () => ({
   useAgentProviderStatus: () => ({
     readyAgentIds: new Set(["goose", "claude-acp", "codex-acp"]),
@@ -63,6 +77,13 @@ describe("ChatInput", () => {
   beforeEach(() => {
     mockListFilesForMentions.mockClear();
     mockListFilesForMentions.mockResolvedValue([]);
+    mockVoiceDictation.isEnabled = true;
+    mockVoiceDictation.isRecording = false;
+    mockVoiceDictation.isTranscribing = false;
+    mockVoiceDictation.isStarting.mockReset();
+    mockVoiceDictation.isStarting.mockReturnValue(false);
+    mockVoiceDictation.stopRecording.mockReset();
+    mockVoiceDictation.toggleRecording.mockReset();
   });
 
   it("renders with default placeholder", () => {
@@ -418,6 +439,53 @@ describe("ChatInput", () => {
     expect(onSend).not.toHaveBeenCalled();
   });
 
+  it("does not stop dictation when send is blocked", async () => {
+    const onSend = vi.fn();
+    const user = userEvent.setup();
+    mockVoiceDictation.isRecording = true;
+
+    render(
+      <ChatInput
+        onSend={onSend}
+        isStreaming
+        queuedMessage={{ text: "queued msg" }}
+      />,
+    );
+
+    await user.type(screen.getByRole("textbox"), "another message");
+    await user.keyboard("{Enter}");
+
+    expect(onSend).not.toHaveBeenCalled();
+    expect(mockVoiceDictation.stopRecording).not.toHaveBeenCalled();
+  });
+
+  it("keeps the mic toggle enabled while recording even if voice input becomes unavailable", () => {
+    render(
+      <ChatInputToolbar
+        personas={[]}
+        selectedPersonaId={null}
+        providers={[]}
+        selectedProvider="goose"
+        onProviderChange={vi.fn()}
+        availableModels={[]}
+        selectedProjectId={null}
+        availableProjects={[]}
+        contextTokens={0}
+        contextLimit={0}
+        canSend={false}
+        isStreaming={false}
+        hasQueuedMessage={false}
+        onSend={vi.fn()}
+        voiceEnabled={false}
+        voiceRecording
+        onVoiceToggle={vi.fn()}
+        isCompact={false}
+      />,
+    );
+
+    expect(screen.getByRole("button", { name: "Listening..." })).toBeEnabled();
+  });
+
   it("keeps the selected assistant chip after sending subsequent messages", async () => {
     const onSend = vi.fn();
     const user = userEvent.setup();
diff --git a/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
new file mode 100644
index 000000000000..751ef8a628b7
--- /dev/null
+++ b/ui/goose2/src/features/settings/ui/LocalWhisperModels.tsx
@@ -0,0 +1,325 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { useTranslation } from "react-i18next";
+import { Button } from "@/shared/ui/button";
+import {
+  cancelDictationLocalModelDownload,
+  deleteDictationLocalModel,
+  downloadDictationLocalModel,
+  getDictationLocalModelDownloadProgress,
+  listDictationLocalModels,
+} from "@/shared/api/dictation";
+
+type LocalModel = {
+  id: string;
+  description: string;
+  sizeMb: number;
+  downloaded: boolean;
+  downloadInProgress: boolean;
+};
+
+type DownloadProgress = {
+  bytesDownloaded: number;
+  totalBytes: number;
+  progressPercent: number;
+  status: string;
+  error?: string | null;
+};
+
+const POLL_INTERVAL_MS = 750;
+
+interface LocalWhisperModelsProps {
+  selectedModelId: string;
+  onSelectModel: (modelId: string) => void | Promise<void>;
+  onModelsChanged: () => void | Promise<void>;
+}
+
+export function LocalWhisperModels({
+  selectedModelId,
+  onSelectModel,
+  onModelsChanged,
+}: LocalWhisperModelsProps) {
+  const { t } = useTranslation(["settings", "common"]);
+  const [models, setModels] = useState<LocalModel[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [downloadingIds, setDownloadingIds] = useState<Set<string>>(new Set());
+  const [progresses, setProgresses] = useState<Map<string, DownloadProgress>>(
+    new Map(),
+  );
+  const onModelsChangedRef = useRef(onModelsChanged);
+  onModelsChangedRef.current = onModelsChanged;
+
+  const refresh = useCallback(async () => {
+    try {
+      const list =
+        (await listDictationLocalModels()) as unknown as LocalModel[];
+      setModels(list);
+      setDownloadingIds((prev) => {
+        const next = new Set(prev);
+        for (const m of list) {
+          if (m.downloadInProgress) next.add(m.id);
+        }
+        return next;
+      });
+    } catch (err) {
+      setError(
+        err instanceof Error ? err.message : t("general.voiceInput.loadError"),
+      );
+    }
+  }, [t]);
+
+  useEffect(() => {
+    const load = async () => {
+      setLoading(true);
+      setError(null);
+      await refresh();
+      setLoading(false);
+    };
+    void load();
+  }, [refresh]);
+
+  useEffect(() => {
+    if (downloadingIds.size === 0) return;
+    let cancelled = false;
+
+    const tick = async () => {
+      const next = new Map<string, DownloadProgress>();
+      const stillActive = new Set<string>();
+      const finishedIds: string[] = [];
+
+      for (const id of downloadingIds) {
+        try {
+          const progress = (await getDictationLocalModelDownloadProgress(
+            id,
+          )) as unknown as DownloadProgress | null;
+          if (!progress) {
+            finishedIds.push(id);
+            continue;
+          }
+          next.set(id, progress);
+          if (progress.status === "downloading") {
+            stillActive.add(id);
+          } else {
+            finishedIds.push(id);
+          }
+        } catch {
+          stillActive.add(id);
+        }
+      }
+      if (cancelled) return;
+      setProgresses(next);
+      if (finishedIds.length > 0) {
+        await refresh();
+        await onModelsChangedRef.current();
+      }
+      setDownloadingIds(stillActive);
+    };
+
+    const interval = window.setInterval(() => {
+      void tick();
+    }, POLL_INTERVAL_MS);
+    return () => {
+      cancelled = true;
+      window.clearInterval(interval);
+    };
+  }, [downloadingIds, refresh]);
+
+  const startDownload = useCallback(
+    async (modelId: string) => {
+      setError(null);
+      try {
+        await downloadDictationLocalModel(modelId);
+        setDownloadingIds((prev) => new Set(prev).add(modelId));
+      } catch (err) {
+        setError(
+          err instanceof Error
+            ? err.message
+            : t("general.voiceInput.saveError"),
+        );
+      }
+    },
+    [t],
+  );
+
+  const cancelDownload = useCallback(
+    async (modelId: string) => {
+      setError(null);
+      try {
+        await cancelDictationLocalModelDownload(modelId);
+      } catch (err) {
+        setError(
+          err instanceof Error
+            ? err.message
+            : t("general.voiceInput.saveError"),
+        );
+      } finally {
+        setProgresses((prev) => {
+          const next = new Map(prev);
+          next.delete(modelId);
+          return next;
+        });
+        setDownloadingIds((prev) => {
+          const next = new Set(prev);
+          next.delete(modelId);
+          return next;
+        });
+        await refresh();
+      }
+    },
+    [refresh, t],
+  );
+
+  const deleteModel = useCallback(
+    async (modelId: string) => {
+      setError(null);
+      try {
+        await deleteDictationLocalModel(modelId);
+        await refresh();
+        await onModelsChanged();
+      } catch (err) {
+        setError(
+          err instanceof Error
+            ? err.message
+            : t("general.voiceInput.deleteError"),
+        );
+      }
+    },
+    [onModelsChanged, refresh, t],
+  );
+
+  if (loading) {
+    return (
+      <div className="rounded-lg border border-border px-3 py-3">
+        <p className="text-xs text-muted-foreground">
+          {t("common:labels.loading")}
+        </p>
+      </div>
+    );
+  }
+
+  if (models.length === 0) {
+    return (
+      <div className="rounded-lg border border-border px-3 py-3">
+        <p className="text-xs text-muted-foreground">
+          {t("general.voiceInput.noLocalModels")}
+        </p>
+      </div>
+    );
+  }
+
+  return (
+    <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+      <div>
+        <p className="text-xs font-medium text-foreground">
+          {t("general.voiceInput.localModelLabel")}
+        </p>
+        <p className="mt-1 text-xs text-muted-foreground">
+          {t("general.voiceInput.localModelDescription")}
+        </p>
+      </div>
+
+      <ul className="divide-y divide-border">
+        {models.map((model) => {
+          const progress = progresses.get(model.id);
+          const isDownloading =
+            downloadingIds.has(model.id) ||
+            progress?.status === "downloading" ||
+            model.downloadInProgress;
+          const isSelected = model.downloaded && model.id === selectedModelId;
+          return (
+            <li
+              key={model.id}
+              className="flex items-start justify-between gap-3 py-2"
+            >
+              <div className="min-w-0 flex-1">
+                <div className="flex items-center gap-2">
+                  <p className="truncate text-xs font-medium text-foreground">
+                    {model.id}
+                  </p>
+                  <span className="text-xs text-muted-foreground">
+                    {model.sizeMb} MB
+                  </span>
+                  {isSelected ? (
+                    <span className="rounded-full bg-primary/10 px-2 py-0.5 text-[10px] font-medium text-primary">
+                      {t("general.voiceInput.selectedModel")}
+                    </span>
+                  ) : null}
+                </div>
+                <p className="mt-1 text-xs text-muted-foreground">
+                  {model.description}
+                </p>
+                {isDownloading && progress ? (
+                  <div className="mt-2 space-y-1">
+                    <div className="h-1.5 w-full overflow-hidden rounded-full bg-muted">
+                      <div
+                        className="h-full bg-primary transition-all"
+                        style={{
+                          width: `${Math.max(0, Math.min(100, progress.progressPercent))}%`,
+                        }}
+                      />
+                    </div>
+                    <p className="text-[10px] text-muted-foreground">
+                      {t("general.voiceInput.downloadProgress", {
+                        percent: Math.round(progress.progressPercent),
+                      })}
+                    </p>
+                  </div>
+                ) : null}
+                {progress?.status === "failed" && progress.error ? (
+                  <p className="mt-1 text-xs text-destructive">
+                    {progress.error}
+                  </p>
+                ) : null}
+              </div>
+
+              <div className="flex flex-shrink-0 gap-2">
+                {isDownloading ? (
+                  <Button
+                    type="button"
+                    size="sm"
+                    variant="outline-flat"
+                    onClick={() => void cancelDownload(model.id)}
+                  >
+                    {t("common:actions.cancel")}
+                  </Button>
+                ) : model.downloaded ? (
+                  <>
+                    {!isSelected ? (
+                      <Button
+                        type="button"
+                        size="sm"
+                        variant="outline-flat"
+                        onClick={() => void onSelectModel(model.id)}
+                      >
+                        {t("general.voiceInput.selectModel")}
+                      </Button>
+                    ) : null}
+                    <Button
+                      type="button"
+                      size="sm"
+                      variant="ghost"
+                      className="text-destructive hover:text-destructive"
+                      onClick={() => void deleteModel(model.id)}
+                    >
+                      {t("general.voiceInput.deleteModel")}
+                    </Button>
+                  </>
+                ) : (
+                  <Button
+                    type="button"
+                    size="sm"
+                    onClick={() => void startDownload(model.id)}
+                  >
+                    {t("general.voiceInput.download")}
+                  </Button>
+                )}
+              </div>
+            </li>
+          );
+        })}
+      </ul>
+
+      {error ? <p className="text-xs text-destructive">{error}</p> : null}
+    </div>
+  );
+}
diff --git a/ui/goose2/src/features/settings/ui/SettingsModal.tsx b/ui/goose2/src/features/settings/ui/SettingsModal.tsx
index 65ab6b6aff76..03400ccef214 100644
--- a/ui/goose2/src/features/settings/ui/SettingsModal.tsx
+++ b/ui/goose2/src/features/settings/ui/SettingsModal.tsx
@@ -21,6 +21,7 @@ import {
   SelectValue,
 } from "@/shared/ui/select";
 import {
+  Mic,
   Palette,
   Settings2,
   FolderKanban,
@@ -34,6 +35,7 @@ import { AppearanceSettings } from "./AppearanceSettings";
 import { DoctorSettings } from "./DoctorSettings";
 import { ProvidersSettings } from "./ProvidersSettings";
 import { ExtensionsSettings } from "@/features/extensions/ui/ExtensionsSettings";
+import { VoiceInputSettings } from "./VoiceInputSettings";
 import {
   listArchivedProjects,
   restoreProject,
@@ -50,6 +52,7 @@ const NAV_ITEMS = [
   { id: "appearance", labelKey: "nav.appearance", icon: Palette },
   { id: "providers", labelKey: "nav.providers", icon: IconPlug },
   { id: "extensions", labelKey: "nav.extensions", icon: IconPuzzle },
+  { id: "voice", labelKey: "nav.voice", icon: Mic },
   { id: "general", labelKey: "nav.general", icon: Settings2 },
   { id: "projects", labelKey: "nav.projects", icon: FolderKanban },
   { id: "chats", labelKey: "nav.chats", icon: MessageSquare },
@@ -241,6 +244,7 @@ export function SettingsModal({
               {activeSection === "appearance" && <AppearanceSettings />}
               {activeSection === "providers" && <ProvidersSettings />}
               {activeSection === "extensions" && <ExtensionsSettings />}
+              {activeSection === "voice" && <VoiceInputSettings />}
               {activeSection === "doctor" && <DoctorSettings />}
               {activeSection === "general" && (
                 <div className="space-y-6">
diff --git a/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
new file mode 100644
index 000000000000..3203608c1d8d
--- /dev/null
+++ b/ui/goose2/src/features/settings/ui/VoiceInputSettings.tsx
@@ -0,0 +1,489 @@
+import { useCallback, useEffect, useMemo, useState } from "react";
+import { useTranslation } from "react-i18next";
+import {
+  deleteDictationProviderSecret,
+  getDictationConfig,
+  saveDictationModelSelection,
+  saveDictationProviderSecret,
+} from "@/shared/api/dictation";
+import {
+  notifyVoiceDictationConfigChanged,
+  getDefaultDictationProvider,
+} from "@/features/chat/lib/voiceInput";
+import { useVoiceInputPreferences } from "@/features/chat/hooks/useVoiceInputPreferences";
+import type {
+  DictationProvider,
+  DictationProviderStatus,
+} from "@/shared/types/dictation";
+import { useAudioDevices } from "@/shared/ui/ai-elements/mic-selector";
+import { Button } from "@/shared/ui/button";
+import { LocalWhisperModels } from "./LocalWhisperModels";
+import { Input } from "@/shared/ui/input";
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from "@/shared/ui/select";
+
+const DISABLED_PROVIDER = "__disabled__";
+
+export function VoiceInputSettings() {
+  const { t } = useTranslation(["settings", "chat", "common"]);
+  const {
+    clearSelectedProvider,
+    hasStoredProviderPreference,
+    isHydrated: voicePrefsHydrated,
+    preferredMicrophoneId,
+    rawAutoSubmitPhrases,
+    selectedProvider,
+    setPreferredMicrophoneId,
+    setRawAutoSubmitPhrases,
+    setSelectedProvider,
+  } = useVoiceInputPreferences();
+  const [providerStatuses, setProviderStatuses] = useState<
+    Record<DictationProvider, DictationProviderStatus>
+  >({} as Record<DictationProvider, DictationProviderStatus>);
+  const [apiKeyInput, setApiKeyInput] = useState("");
+  const [isEditingApiKey, setIsEditingApiKey] = useState(false);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const {
+    devices,
+    error: devicesError,
+    hasPermission,
+    loadDevices,
+    loading: loadingDevices,
+  } = useAudioDevices();
+  const isMicrophoneSupported =
+    typeof navigator !== "undefined" && !!navigator.mediaDevices;
+  const permissionStatus = hasPermission ? "authorized" : "not_determined";
+  const requestPermission = loadDevices;
+
+  const refreshConfig = useCallback(async () => {
+    const nextConfig = await getDictationConfig();
+    setProviderStatuses(nextConfig);
+
+    // Wait for useVoiceInputPreferences to finish loading the stored value
+    // from goose config before deciding whether to auto-select a default.
+    // Otherwise the initial mount sees hasStoredProviderPreference=false
+    // (pre-hydration default) and clobbers the user's saved choice.
+    if (!voicePrefsHydrated) {
+      return;
+    }
+
+    if (!hasStoredProviderPreference) {
+      const defaultProvider = getDefaultDictationProvider(nextConfig);
+      if (defaultProvider) {
+        setSelectedProvider(defaultProvider);
+      }
+      return;
+    }
+
+    if (!selectedProvider) {
+      return;
+    }
+
+    // The stored provider is no longer in the fetched config (e.g. it was
+    // feature-flagged off or removed). Clear the preference entirely rather
+    // than writing `null`, which would persist the explicit "voice off"
+    // sentinel and leave the user opted out across future sessions even
+    // after valid providers reappear.
+    if (!nextConfig[selectedProvider]) {
+      clearSelectedProvider();
+    }
+  }, [
+    clearSelectedProvider,
+    hasStoredProviderPreference,
+    selectedProvider,
+    setSelectedProvider,
+    voicePrefsHydrated,
+  ]);
+
+  useEffect(() => {
+    const load = async () => {
+      setLoading(true);
+      setError(null);
+
+      try {
+        await refreshConfig();
+      } catch (caughtError) {
+        setError(
+          caughtError instanceof Error
+            ? caughtError.message
+            : t("general.voiceInput.loadError"),
+        );
+      } finally {
+        setLoading(false);
+      }
+    };
+
+    void load();
+  }, [refreshConfig, t]);
+
+  const selectedStatus = selectedProvider
+    ? providerStatuses[selectedProvider]
+    : null;
+
+  const providerOptions = useMemo(
+    () =>
+      Object.entries(providerStatuses) as Array<
+        [DictationProvider, DictationProviderStatus]
+      >,
+    [providerStatuses],
+  );
+
+  const currentModelValue =
+    selectedStatus?.selectedModel ?? selectedStatus?.defaultModel ?? "";
+
+  const saveApiKey = useCallback(async () => {
+    if (!selectedProvider) {
+      return;
+    }
+
+    setError(null);
+    try {
+      await saveDictationProviderSecret(
+        selectedProvider,
+        apiKeyInput,
+        selectedStatus?.configKey ?? undefined,
+      );
+      setApiKeyInput("");
+      setIsEditingApiKey(false);
+      await refreshConfig();
+      notifyVoiceDictationConfigChanged();
+    } catch (caughtError) {
+      setError(
+        caughtError instanceof Error
+          ? caughtError.message
+          : t("general.voiceInput.saveError"),
+      );
+    }
+  }, [apiKeyInput, refreshConfig, selectedProvider, selectedStatus, t]);
+
+  const removeApiKey = useCallback(async () => {
+    if (!selectedProvider) {
+      return;
+    }
+
+    setError(null);
+    try {
+      await deleteDictationProviderSecret(
+        selectedProvider,
+        selectedStatus?.configKey ?? undefined,
+      );
+      setApiKeyInput("");
+      setIsEditingApiKey(false);
+      await refreshConfig();
+      notifyVoiceDictationConfigChanged();
+    } catch (caughtError) {
+      setError(
+        caughtError instanceof Error
+          ? caughtError.message
+          : t("general.voiceInput.deleteError"),
+      );
+    }
+  }, [refreshConfig, selectedProvider, selectedStatus, t]);
+
+  const handleModelChange = useCallback(
+    async (modelId: string) => {
+      if (!selectedProvider) {
+        return;
+      }
+
+      setError(null);
+      try {
+        await saveDictationModelSelection(selectedProvider, modelId);
+        await refreshConfig();
+        notifyVoiceDictationConfigChanged();
+      } catch (caughtError) {
+        setError(
+          caughtError instanceof Error
+            ? caughtError.message
+            : t("general.voiceInput.saveError"),
+        );
+      }
+    },
+    [refreshConfig, selectedProvider, t],
+  );
+
+  const selectedMicrophoneLabel = useMemo(() => {
+    if (!preferredMicrophoneId) {
+      return t("general.voiceInput.systemMicrophone");
+    }
+
+    return (
+      devices.find((device) => device.deviceId === preferredMicrophoneId)
+        ?.label || t("general.voiceInput.systemMicrophone")
+    );
+  }, [devices, preferredMicrophoneId, t]);
+
+  if (loading) {
+    return (
+      <div className="space-y-3">
+        <h4 className="text-sm font-semibold">
+          {t("general.voiceInput.label")}
+        </h4>
+        <p className="text-xs text-muted-foreground">
+          {t("common:labels.loading")}
+        </p>
+      </div>
+    );
+  }
+
+  return (
+    <div className="space-y-4">
+      <div>
+        <h4 className="text-sm font-semibold">
+          {t("general.voiceInput.label")}
+        </h4>
+        <p className="mt-1 text-xs text-muted-foreground">
+          {t("general.voiceInput.description")}
+        </p>
+      </div>
+
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <p className="text-xs font-medium text-foreground">
+          {t("general.voiceInput.providerLabel")}
+        </p>
+        <Select
+          value={selectedProvider ?? DISABLED_PROVIDER}
+          onValueChange={(value) =>
+            setSelectedProvider(
+              value === DISABLED_PROVIDER ? null : (value as DictationProvider),
+            )
+          }
+        >
+          <SelectTrigger className="w-full max-w-sm">
+            <SelectValue />
+          </SelectTrigger>
+          <SelectContent>
+            <SelectItem value={DISABLED_PROVIDER}>
+              {t("general.voiceInput.disabled")}
+            </SelectItem>
+            {providerOptions.map(([providerId, status]) => (
+              <SelectItem key={providerId} value={providerId}>
+                {t(`general.voiceInput.providers.${providerId}`)}
+                {!status.configured
+                  ? ` ${t("general.voiceInput.notConfiguredSuffix")}`
+                  : ""}
+              </SelectItem>
+            ))}
+          </SelectContent>
+        </Select>
+      </div>
+
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <div className="flex items-center justify-between gap-3">
+          <div>
+            <p className="text-xs font-medium text-foreground">
+              {t("general.voiceInput.microphoneLabel")}
+            </p>
+            <p className="mt-1 text-xs text-muted-foreground">
+              {isMicrophoneSupported
+                ? t("general.voiceInput.microphoneDescription")
+                : t("general.voiceInput.microphoneUnavailable")}
+            </p>
+          </div>
+          {isMicrophoneSupported && !hasPermission ? (
+            <Button
+              type="button"
+              size="sm"
+              variant="outline-flat"
+              disabled={loadingDevices}
+              onClick={() => void requestPermission()}
+            >
+              {t("general.voiceInput.grantMicrophone")}
+            </Button>
+          ) : null}
+        </div>
+
+        {!devicesError &&
+        !hasPermission &&
+        permissionStatus === "not_determined" ? (
+          <p className="text-xs text-muted-foreground">
+            {t("general.voiceInput.microphoneAccessPrompt")}
+          </p>
+        ) : null}
+
+        {devicesError ? (
+          <p className="text-xs text-muted-foreground">{devicesError}</p>
+        ) : null}
+
+        {isMicrophoneSupported && hasPermission ? (
+          <Select
+            value={preferredMicrophoneId ?? DISABLED_PROVIDER}
+            onValueChange={(value) =>
+              setPreferredMicrophoneId(
+                value === DISABLED_PROVIDER ? null : value,
+              )
+            }
+          >
+            <SelectTrigger className="w-full max-w-sm">
+              <SelectValue>{selectedMicrophoneLabel}</SelectValue>
+            </SelectTrigger>
+            <SelectContent>
+              <SelectItem value={DISABLED_PROVIDER}>
+                {t("general.voiceInput.systemMicrophone")}
+              </SelectItem>
+              {devices
+                .filter((device) => device.deviceId !== "")
+                .map((device) => (
+                  <SelectItem key={device.deviceId} value={device.deviceId}>
+                    {device.label || t("general.voiceInput.unknownMicrophone")}
+                  </SelectItem>
+                ))}
+            </SelectContent>
+          </Select>
+        ) : null}
+      </div>
+
+      {selectedStatus ? (
+        <>
+          {!selectedStatus.usesProviderConfig &&
+          selectedProvider !== "local" ? (
+            <div className="space-y-3 rounded-lg border border-border px-3 py-3">
+              {isEditingApiKey ? (
+                <>
+                  <div>
+                    <p className="text-xs font-medium text-foreground">
+                      {t("general.voiceInput.apiKeyLabel")}
+                    </p>
+                    <p className="mt-1 text-xs text-muted-foreground">
+                      {t("general.voiceInput.apiKeyDescription")}
+                    </p>
+                  </div>
+                  <div className="flex flex-col gap-2 sm:flex-row">
+                    <Input
+                      type="password"
+                      value={apiKeyInput}
+                      onChange={(event) => setApiKeyInput(event.target.value)}
+                      placeholder={t("general.voiceInput.apiKeyPlaceholder")}
+                      className="max-w-sm"
+                    />
+                    <div className="flex gap-2">
+                      <Button
+                        type="button"
+                        size="sm"
+                        onClick={() => void saveApiKey()}
+                      >
+                        {t("common:actions.save")}
+                      </Button>
+                      <Button
+                        type="button"
+                        variant="outline-flat"
+                        size="sm"
+                        onClick={() => {
+                          setApiKeyInput("");
+                          setIsEditingApiKey(false);
+                        }}
+                      >
+                        {t("common:actions.cancel")}
+                      </Button>
+                    </div>
+                  </div>
+                </>
+              ) : (
+                <div className="flex items-center justify-between gap-3">
+                  <div>
+                    <p className="text-xs font-medium text-foreground">
+                      {t("general.voiceInput.apiKeyLabel")}
+                    </p>
+                    <p className="mt-1 text-xs text-muted-foreground">
+                      {selectedStatus.configured
+                        ? t("general.voiceInput.apiKeyConfigured")
+                        : t("general.voiceInput.apiKeyDescription")}
+                    </p>
+                  </div>
+                  <div className="flex gap-2 flex-shrink-0">
+                    <Button
+                      type="button"
+                      size="sm"
+                      variant="outline-flat"
+                      onClick={() => setIsEditingApiKey(true)}
+                    >
+                      {selectedStatus.configured
+                        ? t("general.voiceInput.updateApiKey")
+                        : t("general.voiceInput.addApiKey")}
+                    </Button>
+                    {selectedStatus.configured ? (
+                      <Button
+                        type="button"
+                        size="sm"
+                        variant="ghost"
+                        className="text-destructive hover:text-destructive"
+                        onClick={() => void removeApiKey()}
+                      >
+                        {t("general.voiceInput.removeApiKey")}
+                      </Button>
+                    ) : null}
+                  </div>
+                </div>
+              )}
+            </div>
+          ) : null}
+
+          {selectedProvider === "local" ? (
+            <LocalWhisperModels
+              selectedModelId={currentModelValue}
+              onSelectModel={(modelId) => handleModelChange(modelId)}
+              onModelsChanged={async () => {
+                await refreshConfig();
+                notifyVoiceDictationConfigChanged();
+              }}
+            />
+          ) : (selectedStatus.availableModels ?? []).length > 0 ? (
+            <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+              <p className="text-xs font-medium text-foreground">
+                {t("general.voiceInput.modelLabel")}
+              </p>
+              <Select
+                value={currentModelValue}
+                onValueChange={(value) => void handleModelChange(value)}
+              >
+                <SelectTrigger className="w-full max-w-sm">
+                  <SelectValue />
+                </SelectTrigger>
+                <SelectContent>
+                  {(selectedStatus.availableModels ?? []).map((model) => (
+                    <SelectItem key={model.id} value={model.id}>
+                      {model.label}
+                    </SelectItem>
+                  ))}
+                </SelectContent>
+              </Select>
+              <p className="text-xs text-muted-foreground">
+                {(selectedStatus.availableModels ?? []).find(
+                  (model) => model.id === currentModelValue,
+                )?.description ?? ""}
+              </p>
+            </div>
+          ) : null}
+        </>
+      ) : null}
+
+      <div className="space-y-2 rounded-lg border border-border px-3 py-3">
+        <label
+          htmlFor="voice-auto-submit-phrases"
+          className="text-xs font-medium text-foreground"
+        >
+          {t("general.voiceInput.autoSubmitLabel")}
+        </label>
+        <p className="text-xs text-muted-foreground">
+          {t("general.voiceInput.autoSubmitDescription")}
+        </p>
+        <Input
+          id="voice-auto-submit-phrases"
+          type="text"
+          value={rawAutoSubmitPhrases}
+          onChange={(event) => setRawAutoSubmitPhrases(event.target.value)}
+          placeholder={t("general.voiceInput.placeholder")}
+          className="max-w-sm"
+        />
+      </div>
+
+      {error ? <p className="text-xs text-destructive">{error}</p> : null}
+    </div>
+  );
+}
diff --git a/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx b/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
new file mode 100644
index 000000000000..b79c34577351
--- /dev/null
+++ b/ui/goose2/src/features/settings/ui/__tests__/LocalWhisperModels.test.tsx
@@ -0,0 +1,106 @@
+import { render, screen, waitFor } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { LocalWhisperModels } from "../LocalWhisperModels";
+
+const mockListDictationLocalModels = vi.fn();
+const mockDownloadDictationLocalModel = vi.fn();
+const mockGetDictationLocalModelDownloadProgress = vi.fn();
+const mockCancelDictationLocalModelDownload = vi.fn();
+const mockDeleteDictationLocalModel = vi.fn();
+
+vi.mock("@/shared/api/dictation", () => ({
+  listDictationLocalModels: (...args: unknown[]) =>
+    mockListDictationLocalModels(...args),
+  downloadDictationLocalModel: (...args: unknown[]) =>
+    mockDownloadDictationLocalModel(...args),
+  getDictationLocalModelDownloadProgress: (...args: unknown[]) =>
+    mockGetDictationLocalModelDownloadProgress(...args),
+  cancelDictationLocalModelDownload: (...args: unknown[]) =>
+    mockCancelDictationLocalModelDownload(...args),
+  deleteDictationLocalModel: (...args: unknown[]) =>
+    mockDeleteDictationLocalModel(...args),
+}));
+
+describe("LocalWhisperModels", () => {
+  beforeEach(() => {
+    mockListDictationLocalModels.mockReset();
+    mockDownloadDictationLocalModel.mockReset();
+    mockGetDictationLocalModelDownloadProgress.mockReset();
+    mockCancelDictationLocalModelDownload.mockReset();
+    mockDeleteDictationLocalModel.mockReset();
+  });
+
+  it("clears cached progress when cancelling a download", async () => {
+    const user = userEvent.setup();
+    const onModelsChanged = vi.fn();
+
+    mockListDictationLocalModels
+      .mockResolvedValueOnce([
+        {
+          id: "tiny",
+          description: "Tiny model",
+          sizeMb: 75,
+          downloaded: false,
+          downloadInProgress: true,
+        },
+      ])
+      .mockResolvedValueOnce([
+        {
+          id: "tiny",
+          description: "Tiny model",
+          sizeMb: 75,
+          downloaded: false,
+          downloadInProgress: false,
+        },
+      ]);
+    mockGetDictationLocalModelDownloadProgress.mockResolvedValue({
+      bytesDownloaded: 100,
+      totalBytes: 1000,
+      progressPercent: 10,
+      status: "downloading",
+      error: null,
+    });
+    mockCancelDictationLocalModelDownload.mockResolvedValue(undefined);
+
+    render(
+      <LocalWhisperModels
+        selectedModelId=""
+        onSelectModel={vi.fn()}
+        onModelsChanged={onModelsChanged}
+      />,
+    );
+
+    await waitFor(() =>
+      expect(
+        screen.getByRole("button", { name: /cancel/i }),
+      ).toBeInTheDocument(),
+    );
+
+    await waitFor(
+      () =>
+        expect(mockGetDictationLocalModelDownloadProgress).toHaveBeenCalledWith(
+          "tiny",
+        ),
+      { timeout: 2000 },
+    );
+
+    await user.click(screen.getByRole("button", { name: /cancel/i }));
+
+    await waitFor(() =>
+      expect(mockCancelDictationLocalModelDownload).toHaveBeenCalledWith(
+        "tiny",
+      ),
+    );
+    await waitFor(() =>
+      expect(
+        screen.getByRole("button", { name: /download/i }),
+      ).toBeInTheDocument(),
+    );
+
+    expect(
+      screen.queryByRole("button", { name: /cancel/i }),
+    ).not.toBeInTheDocument();
+    expect(onModelsChanged).not.toHaveBeenCalled();
+  });
+});
diff --git a/ui/goose2/src/shared/api/__tests__/dictation.test.ts b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
new file mode 100644
index 000000000000..79831ca7cc92
--- /dev/null
+++ b/ui/goose2/src/shared/api/__tests__/dictation.test.ts
@@ -0,0 +1,140 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import {
+  cancelDictationLocalModelDownload,
+  deleteDictationLocalModel,
+  downloadDictationLocalModel,
+  getDictationConfig,
+  getDictationLocalModelDownloadProgress,
+  listDictationLocalModels,
+  saveDictationModelSelection,
+  transcribeDictation,
+} from "../dictation";
+import { getClient } from "../acpConnection";
+
+vi.mock("../acpConnection", () => ({
+  getClient: vi.fn(),
+}));
+
+describe("dictation SDK wiring", () => {
+  let client: any;
+  beforeEach(() => {
+    client = {
+      goose: {
+        GooseDictationConfig: vi.fn().mockResolvedValue({
+          providers: {
+            openai: {
+              configured: true,
+              description: "OpenAI transcription",
+              usesProviderConfig: true,
+              availableModels: [],
+            },
+          },
+        }),
+        GooseDictationTranscribe: vi.fn().mockResolvedValue({ text: "hello" }),
+      },
+    };
+    vi.mocked(getClient).mockResolvedValue(client);
+  });
+
+  it("getDictationConfig calls GooseDictationConfig and returns providers map", async () => {
+    const result = await getDictationConfig();
+    expect(client.goose.GooseDictationConfig).toHaveBeenCalledWith({});
+    expect(result.openai.configured).toBe(true);
+  });
+
+  it("transcribeDictation forwards audio + mimeType + provider", async () => {
+    const result = await transcribeDictation({
+      audio: "base64==",
+      mimeType: "audio/webm",
+      provider: "openai" as any,
+    });
+    expect(client.goose.GooseDictationTranscribe).toHaveBeenCalledWith({
+      audio: "base64==",
+      mimeType: "audio/webm",
+      provider: "openai",
+    });
+    expect(result.text).toBe("hello");
+  });
+
+  it("saveDictationModelSelection calls GooseDictationModelSelect", async () => {
+    client.goose.GooseDictationModelSelect = vi.fn().mockResolvedValue({});
+    await saveDictationModelSelection("local" as any, "tiny");
+    expect(client.goose.GooseDictationModelSelect).toHaveBeenCalledWith({
+      provider: "local",
+      modelId: "tiny",
+    });
+  });
+
+  it("listDictationLocalModels returns the models array", async () => {
+    client.goose.GooseDictationModelsList = vi.fn().mockResolvedValue({
+      models: [
+        {
+          id: "tiny",
+          description: "Tiny",
+          sizeMb: 75,
+          downloaded: true,
+          downloadInProgress: false,
+        },
+      ],
+    });
+    const result = await listDictationLocalModels();
+    expect(client.goose.GooseDictationModelsList).toHaveBeenCalledWith({});
+    expect(result).toHaveLength(1);
+    expect(result[0].id).toBe("tiny");
+  });
+
+  it("downloadDictationLocalModel forwards modelId", async () => {
+    client.goose.GooseDictationModelsDownload = vi.fn().mockResolvedValue({});
+    await downloadDictationLocalModel("tiny");
+    expect(client.goose.GooseDictationModelsDownload).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+
+  it("getDictationLocalModelDownloadProgress returns progress or null", async () => {
+    client.goose.GooseDictationModelsDownloadProgress = vi
+      .fn()
+      .mockResolvedValue({
+        progress: {
+          bytesDownloaded: 100,
+          totalBytes: 1000,
+          progressPercent: 10,
+          status: "downloading",
+          error: null,
+        },
+      });
+    const result = await getDictationLocalModelDownloadProgress("tiny");
+    expect(result?.bytesDownloaded).toBe(100);
+    expect(
+      client.goose.GooseDictationModelsDownloadProgress,
+    ).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+
+  it("getDictationLocalModelDownloadProgress returns null when no download", async () => {
+    client.goose.GooseDictationModelsDownloadProgress = vi
+      .fn()
+      .mockResolvedValue({
+        progress: undefined,
+      });
+    const result = await getDictationLocalModelDownloadProgress("tiny");
+    expect(result).toBeNull();
+  });
+
+  it("cancelDictationLocalModelDownload forwards modelId", async () => {
+    client.goose.GooseDictationModelsCancel = vi.fn().mockResolvedValue({});
+    await cancelDictationLocalModelDownload("tiny");
+    expect(client.goose.GooseDictationModelsCancel).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+
+  it("deleteDictationLocalModel forwards modelId", async () => {
+    client.goose.GooseDictationModelsDelete = vi.fn().mockResolvedValue({});
+    await deleteDictationLocalModel("tiny");
+    expect(client.goose.GooseDictationModelsDelete).toHaveBeenCalledWith({
+      modelId: "tiny",
+    });
+  });
+});
diff --git a/ui/goose2/src/shared/api/dictation.ts b/ui/goose2/src/shared/api/dictation.ts
new file mode 100644
index 000000000000..4c3b42c6e140
--- /dev/null
+++ b/ui/goose2/src/shared/api/dictation.ts
@@ -0,0 +1,106 @@
+import { invoke } from "@tauri-apps/api/core";
+import type {
+  DictationDownloadProgress,
+  DictationProvider,
+  DictationProviderStatus,
+  DictationTranscribeResponse,
+  WhisperModelStatus,
+} from "@/shared/types/dictation";
+import { getClient } from "./acpConnection";
+
+export async function getDictationConfig(): Promise<
+  Record<DictationProvider, DictationProviderStatus>
+> {
+  const client = await getClient();
+  const response = await client.goose.GooseDictationConfig({});
+  return response.providers as Record<
+    DictationProvider,
+    DictationProviderStatus
+  >;
+}
+
+export async function transcribeDictation(request: {
+  audio: string;
+  mimeType: string;
+  provider: DictationProvider;
+}): Promise<DictationTranscribeResponse> {
+  const client = await getClient();
+  return client.goose.GooseDictationTranscribe({
+    audio: request.audio,
+    mimeType: request.mimeType,
+    provider: request.provider,
+  });
+}
+
+export async function saveDictationModelSelection(
+  provider: DictationProvider,
+  modelId: string,
+): Promise<void> {
+  const client = await getClient();
+  await client.goose.GooseDictationModelSelect({ provider, modelId });
+}
+
+export async function saveDictationProviderSecret(
+  _provider: DictationProvider,
+  value: string,
+  configKey?: string,
+): Promise<void> {
+  if (!configKey) {
+    throw new Error("No config key for this provider");
+  }
+  return invoke("save_provider_field", { key: configKey, value });
+}
+
+export async function deleteDictationProviderSecret(
+  provider: DictationProvider,
+  _configKey?: string,
+): Promise<void> {
+  const providerIdMap: Record<string, string> = {
+    groq: "dictation_groq",
+    elevenlabs: "dictation_elevenlabs",
+  };
+  const providerId = providerIdMap[provider];
+  if (!providerId) {
+    throw new Error("Cannot delete secrets for this provider");
+  }
+  return invoke("delete_provider_config", { providerId });
+}
+
+export async function listDictationLocalModels(): Promise<
+  WhisperModelStatus[]
+> {
+  const client = await getClient();
+  const response = await client.goose.GooseDictationModelsList({});
+  return response.models as unknown as WhisperModelStatus[];
+}
+
+export async function downloadDictationLocalModel(
+  modelId: string,
+): Promise<void> {
+  const client = await getClient();
+  await client.goose.GooseDictationModelsDownload({ modelId });
+}
+
+export async function getDictationLocalModelDownloadProgress(
+  modelId: string,
+): Promise<DictationDownloadProgress | null> {
+  const client = await getClient();
+  const response = await client.goose.GooseDictationModelsDownloadProgress({
+    modelId,
+  });
+  return (response.progress ?? null) as DictationDownloadProgress | null;
+}
+
+export async function cancelDictationLocalModelDownload(
+  modelId: string,
+): Promise<void> {
+  const client = await getClient();
+  await client.goose.GooseDictationModelsCancel({ modelId });
+}
+
+export async function deleteDictationLocalModel(
+  modelId: string,
+): Promise<void> {
+  const client = await getClient();
+  await client.goose.GooseDictationModelsDelete({ modelId });
+}
diff --git a/ui/goose2/src/shared/i18n/locales/en/chat.json b/ui/goose2/src/shared/i18n/locales/en/chat.json
index efe6776e3d87..424007cc8c5c 100644
--- a/ui/goose2/src/shared/i18n/locales/en/chat.json
+++ b/ui/goose2/src/shared/i18n/locales/en/chat.json
@@ -169,7 +169,11 @@
     "selectProject": "Select project",
     "sendMessage": "Send message",
     "stopGeneration": "Stop generation",
-    "voiceInputSoon": "Voice input (coming soon)"
+    "voiceInput": "Voice dictation",
+    "voiceInputDisabled": "Configure a voice provider in Settings to enable dictation",
+    "voiceInputRecording": "Listening...",
+    "voiceInputTranscribing": "Transcribing...",
+    "voiceInputAutoSubmitHint": "Say \"submit\" to send"
   },
   "tools": {
     "fileNotFound": "File not found: {{path}}",
diff --git a/ui/goose2/src/shared/i18n/locales/en/settings.json b/ui/goose2/src/shared/i18n/locales/en/settings.json
index be55f4766a1d..d9733f3800b5 100644
--- a/ui/goose2/src/shared/i18n/locales/en/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/en/settings.json
@@ -124,7 +124,49 @@
       "spanish": "Spanish",
       "system": "System default ({{language}})"
     },
-    "title": "General"
+    "title": "General",
+    "voiceInput": {
+      "label": "Voice Input",
+      "description": "Configure voice dictation for hands-free input.",
+      "providerLabel": "Transcription Provider",
+      "disabled": "Disabled",
+      "notConfiguredSuffix": "(not configured)",
+      "placeholder": "Select a provider",
+      "modelLabel": "Model",
+      "apiKeyLabel": "API Key",
+      "apiKeyDescription": "Enter your API key for this provider.",
+      "apiKeyPlaceholder": "sk-...",
+      "apiKeyConfigured": "API key configured",
+      "addApiKey": "Add API key",
+      "updateApiKey": "Update API key",
+      "removeApiKey": "Remove API key",
+      "localModelLabel": "Local Whisper Model",
+      "localModelDescription": "Download a Whisper model to run transcription locally. Selecting a model sets it as your active local transcription model.",
+      "noLocalModels": "No local Whisper models available.",
+      "download": "Download",
+      "selectModel": "Select",
+      "selectedModel": "Selected",
+      "deleteModel": "Delete",
+      "microphoneLabel": "Microphone",
+      "microphoneDescription": "Choose which microphone to use for voice input.",
+      "microphoneUnavailable": "Microphone access is not available in this environment.",
+      "microphoneAccessPrompt": "Click \"Grant access\" to allow microphone use.",
+      "grantMicrophone": "Grant access",
+      "systemMicrophone": "System default",
+      "unknownMicrophone": "Unknown microphone",
+      "autoSubmitLabel": "Auto-submit Phrases",
+      "autoSubmitDescription": "Comma-separated words that trigger automatic send (e.g. \"submit\").",
+      "providers": {
+        "openai": "OpenAI Whisper",
+        "groq": "Groq",
+        "elevenlabs": "ElevenLabs",
+        "local": "Local Whisper"
+      },
+      "downloadProgress": "Downloading... {{percent}}%",
+      "loadError": "Failed to load voice settings.",
+      "saveError": "Failed to save.",
+      "deleteError": "Failed to delete."
+    }
   },
   "nav": {
     "about": "About",
@@ -134,7 +176,8 @@
     "general": "General",
     "projects": "Projects",
     "extensions": "Extensions",
-    "providers": "Providers"
+    "providers": "Providers",
+    "voice": "Voice"
   },
   "projects": {
     "description": "Manage your projects.",
diff --git a/ui/goose2/src/shared/i18n/locales/es/chat.json b/ui/goose2/src/shared/i18n/locales/es/chat.json
index 3a5760189e23..5bd93d8a560d 100644
--- a/ui/goose2/src/shared/i18n/locales/es/chat.json
+++ b/ui/goose2/src/shared/i18n/locales/es/chat.json
@@ -169,7 +169,11 @@
     "selectProject": "Seleccionar proyecto",
     "sendMessage": "Enviar mensaje",
     "stopGeneration": "Detener generación",
-    "voiceInputSoon": "Entrada de voz (pronto)"
+    "voiceInput": "Dictado por voz",
+    "voiceInputDisabled": "Configura un proveedor de voz en Ajustes para activar el dictado",
+    "voiceInputRecording": "Escuchando...",
+    "voiceInputTranscribing": "Transcribiendo...",
+    "voiceInputAutoSubmitHint": "Di \"enviar\" para enviar"
   },
   "tools": {
     "fileNotFound": "Archivo no encontrado: {{path}}",
diff --git a/ui/goose2/src/shared/i18n/locales/es/settings.json b/ui/goose2/src/shared/i18n/locales/es/settings.json
index 8b2b85236ece..16e33a960aa6 100644
--- a/ui/goose2/src/shared/i18n/locales/es/settings.json
+++ b/ui/goose2/src/shared/i18n/locales/es/settings.json
@@ -124,7 +124,49 @@
       "spanish": "Español",
       "system": "Predeterminado del sistema ({{language}})"
     },
-    "title": "General"
+    "title": "General",
+    "voiceInput": {
+      "label": "Entrada de voz",
+      "description": "Configura el dictado por voz para entrada manos libres.",
+      "providerLabel": "Proveedor de transcripción",
+      "disabled": "Desactivado",
+      "notConfiguredSuffix": "(no configurado)",
+      "placeholder": "Selecciona un proveedor",
+      "modelLabel": "Modelo",
+      "apiKeyLabel": "Clave API",
+      "apiKeyDescription": "Ingresa tu clave API para este proveedor.",
+      "apiKeyPlaceholder": "sk-...",
+      "apiKeyConfigured": "Clave API configurada",
+      "addApiKey": "Agregar clave API",
+      "updateApiKey": "Actualizar clave API",
+      "removeApiKey": "Eliminar clave API",
+      "localModelLabel": "Modelo Whisper local",
+      "localModelDescription": "Descarga un modelo Whisper para transcribir localmente. Seleccionar un modelo lo establece como tu modelo de transcripción local activo.",
+      "noLocalModels": "No hay modelos Whisper locales disponibles.",
+      "download": "Descargar",
+      "selectModel": "Seleccionar",
+      "selectedModel": "Seleccionado",
+      "deleteModel": "Eliminar",
+      "microphoneLabel": "Micrófono",
+      "microphoneDescription": "Elige qué micrófono usar para la entrada de voz.",
+      "microphoneUnavailable": "El acceso al micrófono no está disponible en este entorno.",
+      "microphoneAccessPrompt": "Haz clic en \"Permitir acceso\" para usar el micrófono.",
+      "grantMicrophone": "Permitir acceso",
+      "systemMicrophone": "Predeterminado del sistema",
+      "unknownMicrophone": "Micrófono desconocido",
+      "autoSubmitLabel": "Frases de envío automático",
+      "autoSubmitDescription": "Palabras separadas por coma que activan el envío automático (ej. \"enviar\").",
+      "providers": {
+        "openai": "OpenAI Whisper",
+        "groq": "Groq",
+        "elevenlabs": "ElevenLabs",
+        "local": "Whisper local"
+      },
+      "downloadProgress": "Descargando... {{percent}}%",
+      "loadError": "Error al cargar ajustes de voz.",
+      "saveError": "Error al guardar.",
+      "deleteError": "Error al eliminar."
+    }
   },
   "nav": {
     "about": "Acerca de",
@@ -134,7 +176,8 @@
     "general": "General",
     "projects": "Proyectos",
     "extensions": "Extensiones",
-    "providers": "Proveedores"
+    "providers": "Proveedores",
+    "voice": "Voz"
   },
   "projects": {
     "description": "Administra tus proyectos.",
diff --git a/ui/goose2/src/shared/types/dictation.ts b/ui/goose2/src/shared/types/dictation.ts
new file mode 100644
index 000000000000..f27593506772
--- /dev/null
+++ b/ui/goose2/src/shared/types/dictation.ts
@@ -0,0 +1,47 @@
+export type DictationProvider = "openai" | "groq" | "elevenlabs" | "local";
+
+export interface DictationModelOption {
+  id: string;
+  label: string;
+  description: string;
+}
+
+export interface DictationProviderStatus {
+  configured: boolean;
+  host?: string | null;
+  description: string;
+  usesProviderConfig: boolean;
+  settingsPath?: string | null;
+  configKey?: string | null;
+  modelConfigKey?: string | null;
+  defaultModel?: string | null;
+  selectedModel?: string | null;
+  availableModels: DictationModelOption[];
+}
+
+export interface DictationTranscribeResponse {
+  text: string;
+}
+
+export type MicrophonePermissionStatus =
+  | "not_determined"
+  | "authorized"
+  | "denied"
+  | "restricted"
+  | "unsupported";
+
+export interface WhisperModelStatus {
+  id: string;
+  sizeMb: number;
+  description: string;
+  downloaded: boolean;
+  downloadInProgress: boolean;
+}
+
+export interface DictationDownloadProgress {
+  bytesDownloaded: number;
+  totalBytes: number;
+  progressPercent: number;
+  status: string;
+  error?: string | null;
+}
diff --git a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
index 9e0369e135e3..0a05fc705b59 100644
--- a/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
+++ b/ui/goose2/src/shared/ui/ai-elements/mic-selector.tsx
@@ -74,10 +74,6 @@ export const useAudioDevices = () => {
   }, []);
 
   const loadDevicesWithPermission = useCallback(async () => {
-    if (loading) {
-      return;
-    }
-
     try {
       setLoading(true);
       setError(null);
@@ -108,11 +104,57 @@ export const useAudioDevices = () => {
     } finally {
       setLoading(false);
     }
-  }, [loading]);
+  }, []);
 
   useEffect(() => {
-    loadDevicesWithoutPermission();
-  }, [loadDevicesWithoutPermission]);
+    let cancelled = false;
+    let status: PermissionStatus | null = null;
+    const onChange = () => {
+      if (cancelled || !status) return;
+      const granted = status.state === "granted";
+      setHasPermission(granted);
+      // When permission flips to granted mid-session (e.g. the user enabled
+      // mic access via OS settings), re-enumerate devices so we pick up the
+      // real deviceIds/labels — the prior enumeration may have returned
+      // empty-string entries that VoiceInputSettings filters out.
+      if (granted) {
+        void loadDevicesWithPermission();
+      }
+    };
+
+    const init = async () => {
+      let alreadyGranted = false;
+      try {
+        status = await navigator.permissions.query({
+          name: "microphone" as PermissionName,
+        });
+        if (cancelled) return;
+        alreadyGranted = status.state === "granted";
+        setHasPermission(alreadyGranted);
+        status.addEventListener("change", onChange);
+      } catch {
+        // Permissions API not available for microphone; fall back silently.
+      }
+      if (cancelled) return;
+      // If OS-level permission is already granted, enumerate through the
+      // permission-ful path — otherwise enumerateDevices() may return
+      // entries with empty deviceId/label, which Radix Select rejects.
+      if (alreadyGranted) {
+        await loadDevicesWithPermission();
+      } else {
+        await loadDevicesWithoutPermission();
+      }
+    };
+
+    void init();
+
+    return () => {
+      cancelled = true;
+      if (status) {
+        status.removeEventListener("change", onChange);
+      }
+    };
+  }, [loadDevicesWithPermission, loadDevicesWithoutPermission]);
 
   useEffect(() => {
     const handleDeviceChange = () => {
diff --git a/ui/sdk/src/generated/client.gen.ts b/ui/sdk/src/generated/client.gen.ts
index 3bd1e0d13f89..a1eeeee569d3 100644
--- a/ui/sdk/src/generated/client.gen.ts
+++ b/ui/sdk/src/generated/client.gen.ts
@@ -13,6 +13,18 @@ import type {
   CheckSecretRequest,
   CheckSecretResponse,
   DeleteSessionRequest,
+  DictationConfigRequest,
+  DictationConfigResponse,
+  DictationModelCancelRequest,
+  DictationModelDeleteRequest,
+  DictationModelDownloadProgressRequest,
+  DictationModelDownloadProgressResponse,
+  DictationModelDownloadRequest,
+  DictationModelSelectRequest,
+  DictationModelsListRequest,
+  DictationModelsListResponse,
+  DictationTranscribeRequest,
+  DictationTranscribeResponse,
   ExportSessionRequest,
   ExportSessionResponse,
   GetExtensionsRequest,
@@ -43,6 +55,10 @@ import type {
 } from './types.gen.js';
 import {
   zCheckSecretResponse,
+  zDictationConfigResponse,
+  zDictationModelDownloadProgressResponse,
+  zDictationModelsListResponse,
+  zDictationTranscribeResponse,
   zExportSessionResponse,
   zGetExtensionsResponse,
   zGetProviderDetailsResponse,
@@ -174,4 +190,71 @@ export class GooseExtClient {
   async GooseSessionUnarchive(params: UnarchiveSessionRequest): Promise<void> {
     await this.conn.extMethod("_goose/session/unarchive", params);
   }
+
+  async GooseDictationTranscribe(
+    params: DictationTranscribeRequest,
+  ): Promise<DictationTranscribeResponse> {
+    const raw = await this.conn.extMethod(
+      "_goose/dictation/transcribe",
+      params,
+    );
+    return zDictationTranscribeResponse.parse(
+      raw,
+    ) as DictationTranscribeResponse;
+  }
+
+  async GooseDictationConfig(
+    params: DictationConfigRequest,
+  ): Promise<DictationConfigResponse> {
+    const raw = await this.conn.extMethod("_goose/dictation/config", params);
+    return zDictationConfigResponse.parse(raw) as DictationConfigResponse;
+  }
+
+  async GooseDictationModelsList(
+    params: DictationModelsListRequest,
+  ): Promise<DictationModelsListResponse> {
+    const raw = await this.conn.extMethod(
+      "_goose/dictation/models/list",
+      params,
+    );
+    return zDictationModelsListResponse.parse(
+      raw,
+    ) as DictationModelsListResponse;
+  }
+
+  async GooseDictationModelsDownload(
+    params: DictationModelDownloadRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/models/download", params);
+  }
+
+  async GooseDictationModelsDownloadProgress(
+    params: DictationModelDownloadProgressRequest,
+  ): Promise<DictationModelDownloadProgressResponse> {
+    const raw = await this.conn.extMethod(
+      "_goose/dictation/models/download/progress",
+      params,
+    );
+    return zDictationModelDownloadProgressResponse.parse(
+      raw,
+    ) as DictationModelDownloadProgressResponse;
+  }
+
+  async GooseDictationModelsCancel(
+    params: DictationModelCancelRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/models/cancel", params);
+  }
+
+  async GooseDictationModelsDelete(
+    params: DictationModelDeleteRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/models/delete", params);
+  }
+
+  async GooseDictationModelSelect(
+    params: DictationModelSelectRequest,
+  ): Promise<void> {
+    await this.conn.extMethod("_goose/dictation/model/select", params);
+  }
 }
diff --git a/ui/sdk/src/generated/index.ts b/ui/sdk/src/generated/index.ts
index aa103a439a31..d1886b07d767 100644
--- a/ui/sdk/src/generated/index.ts
+++ b/ui/sdk/src/generated/index.ts
@@ -1,6 +1,6 @@
 // This file is auto-generated by @hey-api/openapi-ts
 
-export type { AddExtensionRequest, ArchiveSessionRequest, CheckSecretRequest, CheckSecretResponse, DeleteSessionRequest, EmptyResponse, ExportSessionRequest, ExportSessionResponse, ExtRequest, ExtResponse, GetExtensionsRequest, GetExtensionsResponse, GetProviderDetailsRequest, GetProviderDetailsResponse, GetProviderModelsRequest, GetProviderModelsResponse, GetSessionExtensionsRequest, GetSessionExtensionsResponse, GetToolsRequest, GetToolsResponse, ImportSessionRequest, ImportSessionResponse, ListProvidersRequest, ListProvidersResponse, ModelEntry, ProviderConfigKey, ProviderDetailEntry, ProviderListEntry, ReadConfigRequest, ReadConfigResponse, ReadResourceRequest, ReadResourceResponse, RemoveConfigRequest, RemoveExtensionRequest, RemoveSecretRequest, UnarchiveSessionRequest, UpdateWorkingDirRequest, UpsertConfigRequest, UpsertSecretRequest } from './types.gen.js';
+export type { AddExtensionRequest, ArchiveSessionRequest, CheckSecretRequest, CheckSecretResponse, DeleteSessionRequest, DictationConfigRequest, DictationConfigResponse, DictationDownloadProgress, DictationLocalModelStatus, DictationModelCancelRequest, DictationModelDeleteRequest, DictationModelDownloadProgressRequest, DictationModelDownloadProgressResponse, DictationModelDownloadRequest, DictationModelOption, DictationModelSelectRequest, DictationModelsListRequest, DictationModelsListResponse, DictationProviderStatusEntry, DictationTranscribeRequest, DictationTranscribeResponse, EmptyResponse, ExportSessionRequest, ExportSessionResponse, ExtRequest, ExtResponse, GetExtensionsRequest, GetExtensionsResponse, GetProviderDetailsRequest, GetProviderDetailsResponse, GetProviderModelsRequest, GetProviderModelsResponse, GetSessionExtensionsRequest, GetSessionExtensionsResponse, GetToolsRequest, GetToolsResponse, ImportSessionRequest, ImportSessionResponse, ListProvidersRequest, ListProvidersResponse, ModelEntry, ProviderConfigKey, ProviderDetailEntry, ProviderListEntry, ReadConfigRequest, ReadConfigResponse, ReadResourceRequest, ReadResourceResponse, RemoveConfigRequest, RemoveExtensionRequest, RemoveSecretRequest, UnarchiveSessionRequest, UpdateWorkingDirRequest, UpsertConfigRequest, UpsertSecretRequest } from './types.gen.js';
 
 export const GOOSE_EXT_METHODS = [
   {
@@ -108,6 +108,46 @@ export const GOOSE_EXT_METHODS = [
     requestType: "UnarchiveSessionRequest",
     responseType: "EmptyResponse",
   },
+  {
+    method: "_goose/dictation/transcribe",
+    requestType: "DictationTranscribeRequest",
+    responseType: "DictationTranscribeResponse",
+  },
+  {
+    method: "_goose/dictation/config",
+    requestType: "DictationConfigRequest",
+    responseType: "DictationConfigResponse",
+  },
+  {
+    method: "_goose/dictation/models/list",
+    requestType: "DictationModelsListRequest",
+    responseType: "DictationModelsListResponse",
+  },
+  {
+    method: "_goose/dictation/models/download",
+    requestType: "DictationModelDownloadRequest",
+    responseType: "EmptyResponse",
+  },
+  {
+    method: "_goose/dictation/models/download/progress",
+    requestType: "DictationModelDownloadProgressRequest",
+    responseType: "DictationModelDownloadProgressResponse",
+  },
+  {
+    method: "_goose/dictation/models/cancel",
+    requestType: "DictationModelCancelRequest",
+    responseType: "EmptyResponse",
+  },
+  {
+    method: "_goose/dictation/models/delete",
+    requestType: "DictationModelDeleteRequest",
+    responseType: "EmptyResponse",
+  },
+  {
+    method: "_goose/dictation/model/select",
+    requestType: "DictationModelSelectRequest",
+    responseType: "EmptyResponse",
+  },
 ] as const;
 
 export type GooseExtMethod = (typeof GOOSE_EXT_METHODS)[number];
diff --git a/ui/sdk/src/generated/types.gen.ts b/ui/sdk/src/generated/types.gen.ts
index e27160830133..15cf78ea75a7 100644
--- a/ui/sdk/src/generated/types.gen.ts
+++ b/ui/sdk/src/generated/types.gen.ts
@@ -281,17 +281,154 @@ export type UnarchiveSessionRequest = {
     sessionId: string;
 };
 
+/**
+ * Transcribe audio via a dictation provider.
+ */
+export type DictationTranscribeRequest = {
+    /**
+     * Base64-encoded audio data
+     */
+    audio: string;
+    /**
+     * MIME type (e.g. "audio/wav", "audio/webm")
+     */
+    mimeType: string;
+    /**
+     * Provider to use: "openai", "groq", "elevenlabs", or "local"
+     */
+    provider: string;
+};
+
+/**
+ * Transcription result.
+ */
+export type DictationTranscribeResponse = {
+    text: string;
+};
+
+/**
+ * Get the configuration status of all dictation providers.
+ */
+export type DictationConfigRequest = {
+    [key: string]: unknown;
+};
+
+/**
+ * Dictation config response — map of provider name to status.
+ */
+export type DictationConfigResponse = {
+    providers: {
+        [key: string]: DictationProviderStatusEntry;
+    };
+};
+
+/**
+ * Per-provider configuration status.
+ */
+export type DictationProviderStatusEntry = {
+    configured: boolean;
+    host?: string | null;
+    description: string;
+    usesProviderConfig: boolean;
+    settingsPath?: string | null;
+    configKey?: string | null;
+    modelConfigKey?: string | null;
+    defaultModel?: string | null;
+    selectedModel?: string | null;
+    availableModels?: Array<DictationModelOption>;
+};
+
+export type DictationModelOption = {
+    id: string;
+    label: string;
+    description: string;
+};
+
+/**
+ * List available local Whisper models with their download status.
+ */
+export type DictationModelsListRequest = {
+    [key: string]: unknown;
+};
+
+export type DictationModelsListResponse = {
+    models: Array<DictationLocalModelStatus>;
+};
+
+export type DictationLocalModelStatus = {
+    id: string;
+    label: string;
+    description: string;
+    sizeMb: number;
+    downloaded: boolean;
+    downloadInProgress: boolean;
+};
+
+/**
+ * Kick off a background download of a local Whisper model.
+ */
+export type DictationModelDownloadRequest = {
+    modelId: string;
+};
+
+/**
+ * Poll the progress of an in-flight download.
+ */
+export type DictationModelDownloadProgressRequest = {
+    modelId: string;
+};
+
+export type DictationModelDownloadProgressResponse = {
+    /**
+     * None when no download is active for this model id.
+     */
+    progress?: DictationDownloadProgress | null;
+};
+
+export type DictationDownloadProgress = {
+    bytesDownloaded: number;
+    totalBytes: number;
+    progressPercent: number;
+    /**
+     * serde lowercase of DownloadStatus: "downloading" | "completed" | "failed" | "cancelled"
+     */
+    status: string;
+    error?: string | null;
+};
+
+/**
+ * Cancel an in-flight download.
+ */
+export type DictationModelCancelRequest = {
+    modelId: string;
+};
+
+/**
+ * Delete a downloaded local Whisper model from disk.
+ */
+export type DictationModelDeleteRequest = {
+    modelId: string;
+};
+
+/**
+ * Persist the user's model selection for a given provider.
+ */
+export type DictationModelSelectRequest = {
+    provider: string;
+    modelId: string;
+};
+
 export type ExtRequest = {
     id: string;
     method: string;
-    params?: AddExtensionRequest | RemoveExtensionRequest | GetToolsRequest | ReadResourceRequest | UpdateWorkingDirRequest | DeleteSessionRequest | GetExtensionsRequest | GetSessionExtensionsRequest | ListProvidersRequest | GetProviderDetailsRequest | GetProviderModelsRequest | ReadConfigRequest | UpsertConfigRequest | RemoveConfigRequest | CheckSecretRequest | UpsertSecretRequest | RemoveSecretRequest | ExportSessionRequest | ImportSessionRequest | ArchiveSessionRequest | UnarchiveSessionRequest | {
+    params?: AddExtensionRequest | RemoveExtensionRequest | GetToolsRequest | ReadResourceRequest | UpdateWorkingDirRequest | DeleteSessionRequest | GetExtensionsRequest | GetSessionExtensionsRequest | ListProvidersRequest | GetProviderDetailsRequest | GetProviderModelsRequest | ReadConfigRequest | UpsertConfigRequest | RemoveConfigRequest | CheckSecretRequest | UpsertSecretRequest | RemoveSecretRequest | ExportSessionRequest | ImportSessionRequest | ArchiveSessionRequest | UnarchiveSessionRequest | DictationTranscribeRequest | DictationConfigRequest | DictationModelsListRequest | DictationModelDownloadRequest | DictationModelDownloadProgressRequest | DictationModelCancelRequest | DictationModelDeleteRequest | DictationModelSelectRequest | {
         [key: string]: unknown;
     } | null;
 };
 
 export type ExtResponse = {
     id: string;
-    result?: EmptyResponse | GetToolsResponse | ReadResourceResponse | GetExtensionsResponse | GetSessionExtensionsResponse | ListProvidersResponse | GetProviderDetailsResponse | GetProviderModelsResponse | ReadConfigResponse | CheckSecretResponse | ExportSessionResponse | ImportSessionResponse | unknown;
+    result?: EmptyResponse | GetToolsResponse | ReadResourceResponse | GetExtensionsResponse | GetSessionExtensionsResponse | ListProvidersResponse | GetProviderDetailsResponse | GetProviderModelsResponse | ReadConfigResponse | CheckSecretResponse | ExportSessionResponse | ImportSessionResponse | DictationTranscribeResponse | DictationConfigResponse | DictationModelsListResponse | DictationModelDownloadProgressResponse | unknown;
 } | {
     error: {
         code: number;
diff --git a/ui/sdk/src/generated/zod.gen.ts b/ui/sdk/src/generated/zod.gen.ts
index 1679d0ae1585..b48935fb2d5d 100644
--- a/ui/sdk/src/generated/zod.gen.ts
+++ b/ui/sdk/src/generated/zod.gen.ts
@@ -271,6 +271,146 @@ export const zUnarchiveSessionRequest = z.object({
     sessionId: z.string()
 });
 
+/**
+ * Transcribe audio via a dictation provider.
+ */
+export const zDictationTranscribeRequest = z.object({
+    audio: z.string(),
+    mimeType: z.string(),
+    provider: z.string()
+});
+
+/**
+ * Transcription result.
+ */
+export const zDictationTranscribeResponse = z.object({
+    text: z.string()
+});
+
+/**
+ * Get the configuration status of all dictation providers.
+ */
+export const zDictationConfigRequest = z.record(z.unknown());
+
+export const zDictationModelOption = z.object({
+    id: z.string(),
+    label: z.string(),
+    description: z.string()
+});
+
+/**
+ * Per-provider configuration status.
+ */
+export const zDictationProviderStatusEntry = z.object({
+    configured: z.boolean(),
+    host: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    description: z.string(),
+    usesProviderConfig: z.boolean(),
+    settingsPath: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    configKey: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    modelConfigKey: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    defaultModel: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    selectedModel: z.union([
+        z.string(),
+        z.null()
+    ]).optional(),
+    availableModels: z.array(zDictationModelOption).optional().default([])
+});
+
+/**
+ * Dictation config response — map of provider name to status.
+ */
+export const zDictationConfigResponse = z.object({
+    providers: z.record(zDictationProviderStatusEntry)
+});
+
+/**
+ * List available local Whisper models with their download status.
+ */
+export const zDictationModelsListRequest = z.record(z.unknown());
+
+export const zDictationLocalModelStatus = z.object({
+    id: z.string(),
+    label: z.string(),
+    description: z.string(),
+    sizeMb: z.number().int().gte(0),
+    downloaded: z.boolean(),
+    downloadInProgress: z.boolean()
+});
+
+export const zDictationModelsListResponse = z.object({
+    models: z.array(zDictationLocalModelStatus)
+});
+
+/**
+ * Kick off a background download of a local Whisper model.
+ */
+export const zDictationModelDownloadRequest = z.object({
+    modelId: z.string()
+});
+
+/**
+ * Poll the progress of an in-flight download.
+ */
+export const zDictationModelDownloadProgressRequest = z.object({
+    modelId: z.string()
+});
+
+export const zDictationDownloadProgress = z.object({
+    bytesDownloaded: z.number().int().gte(0),
+    totalBytes: z.number().int().gte(0),
+    progressPercent: z.number(),
+    status: z.string(),
+    error: z.union([
+        z.string(),
+        z.null()
+    ]).optional()
+});
+
+export const zDictationModelDownloadProgressResponse = z.object({
+    progress: z.union([
+        zDictationDownloadProgress,
+        z.null()
+    ]).optional()
+});
+
+/**
+ * Cancel an in-flight download.
+ */
+export const zDictationModelCancelRequest = z.object({
+    modelId: z.string()
+});
+
+/**
+ * Delete a downloaded local Whisper model from disk.
+ */
+export const zDictationModelDeleteRequest = z.object({
+    modelId: z.string()
+});
+
+/**
+ * Persist the user's model selection for a given provider.
+ */
+export const zDictationModelSelectRequest = z.object({
+    provider: z.string(),
+    modelId: z.string()
+});
+
 export const zExtRequest = z.object({
     id: z.string(),
     method: z.string(),
@@ -296,7 +436,15 @@ export const zExtRequest = z.object({
             zExportSessionRequest,
             zImportSessionRequest,
             zArchiveSessionRequest,
-            zUnarchiveSessionRequest
+            zUnarchiveSessionRequest,
+            zDictationTranscribeRequest,
+            zDictationConfigRequest,
+            zDictationModelsListRequest,
+            zDictationModelDownloadRequest,
+            zDictationModelDownloadProgressRequest,
+            zDictationModelCancelRequest,
+            zDictationModelDeleteRequest,
+            zDictationModelSelectRequest
         ]),
         z.union([
             z.record(z.unknown()),
@@ -321,7 +469,11 @@ export const zExtResponse = z.union([
                 zReadConfigResponse,
                 zCheckSecretResponse,
                 zExportSessionResponse,
-                zImportSessionResponse
+                zImportSessionResponse,
+                zDictationTranscribeResponse,
+                zDictationConfigResponse,
+                zDictationModelsListResponse,
+                zDictationModelDownloadProgressResponse
             ]),
             z.unknown()
         ]).optional()