diff --git a/crates/goose-cli/src/commands/mcp.rs b/crates/goose-cli/src/commands/mcp.rs
index 0db83f3efc0d..325a3fa4d347 100644
--- a/crates/goose-cli/src/commands/mcp.rs
+++ b/crates/goose-cli/src/commands/mcp.rs
@@ -32,6 +32,7 @@ pub async fn run_server(name: &str) -> Result<()> {
         "computercontroller" => Some(Box::new(RouterService(ComputerControllerRouter::new()))),
         "autovisualiser" => Some(Box::new(RouterService(AutoVisualiserRouter::new()))),
         "memory" => Some(Box::new(RouterService(MemoryRouter::new()))),
+
         "tutorial" => Some(Box::new(RouterService(TutorialRouter::new()))),
         _ => None,
     };
diff --git a/crates/goose-mcp/Cargo.toml b/crates/goose-mcp/Cargo.toml
index feb195ded449..5c21287ac043 100644
--- a/crates/goose-mcp/Cargo.toml
+++ b/crates/goose-mcp/Cargo.toml
@@ -11,6 +11,7 @@ description.workspace = true
 workspace = true
 
 [dependencies]
+
 mcp-core = { path = "../mcp-core" }
 mcp-server = { path = "../mcp-server" }
 rmcp = { workspace = true }
diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs
index 0503641f24e4..53e04fefbcc0 100644
--- a/crates/goose/src/agents/agent.rs
+++ b/crates/goose/src/agents/agent.rs
@@ -41,7 +41,6 @@ use crate::providers::errors::ProviderError;
 use crate::recipe::{Author, Recipe, Response, Settings, SubRecipe};
 use crate::scheduler_trait::SchedulerTrait;
 use crate::session;
-use crate::session::extension_data::ExtensionState;
 use crate::tool_monitor::{ToolCall, ToolMonitor};
 use crate::utils::is_token_cancelled;
 use mcp_core::ToolResult;
@@ -58,9 +57,7 @@ use super::final_output_tool::FinalOutputTool;
 use super::platform_tools;
 use super::tool_execution::{ToolCallResult, CHAT_MODE_TOOL_SKIPPED_RESPONSE, DECLINED_RESPONSE};
 use crate::agents::subagent_task_config::TaskConfig;
-use crate::agents::todo_tools::{
-    todo_read_tool, todo_write_tool, TODO_READ_TOOL_NAME, TODO_WRITE_TOOL_NAME,
-};
+
 use crate::conversation::message::{Message, ToolRequest};
 
 const DEFAULT_MAX_TURNS: u32 = 1000;
@@ -226,6 +223,9 @@ impl Agent {
         unfixed_conversation: Conversation,
         session: &Option<SessionConfig>,
     ) -> Result<ReplyContext> {
+        // Ensure TODO extension is registered if we have a session
+        self.ensure_todo_extension(session).await;
+
         let unfixed_messages = unfixed_conversation.messages().clone();
         let (conversation, issues) = fix_conversation(unfixed_conversation.clone());
         if !issues.is_empty() {
@@ -484,100 +484,6 @@ impl Agent {
                 "Frontend tool execution required".to_string(),
                 None,
             )))
-        } else if tool_call.name == TODO_READ_TOOL_NAME {
-            // Handle task planner read tool
-            let session_file_path = if let Some(session_config) = session {
-                session::storage::get_path(session_config.id.clone()).ok()
-            } else {
-                None
-            };
-
-            let todo_content = if let Some(path) = session_file_path {
-                session::storage::read_metadata(&path)
-                    .ok()
-                    .and_then(|m| {
-                        session::TodoState::from_extension_data(&m.extension_data)
-                            .map(|state| state.content)
-                    })
-                    .unwrap_or_default()
-            } else {
-                String::new()
-            };
-
-            ToolCallResult::from(Ok(vec![Content::text(todo_content)]))
-        } else if tool_call.name == TODO_WRITE_TOOL_NAME {
-            // Handle task planner write tool
-            let content = tool_call
-                .arguments
-                .get("content")
-                .and_then(|v| v.as_str())
-                .unwrap_or("")
-                .to_string();
-
-            // Character limit validation
-            let char_count = content.chars().count();
-            let max_chars = std::env::var("GOOSE_TODO_MAX_CHARS")
-                .ok()
-                .and_then(|s| s.parse().ok())
-                .unwrap_or(50_000);
-
-            if max_chars > 0 && char_count > max_chars {
-                ToolCallResult::from(Err(ErrorData::new(
-                    ErrorCode::INTERNAL_ERROR,
-                    format!(
-                        "Todo list too large: {} chars (max: {})",
-                        char_count, max_chars
-                    ),
-                    None,
-                )))
-            } else if let Some(session_config) = session {
-                // Update session metadata with new TODO content
-                match session::storage::get_path(session_config.id.clone()) {
-                    Ok(path) => match session::storage::read_metadata(&path) {
-                        Ok(mut metadata) => {
-                            let todo_state = session::TodoState::new(content);
-                            todo_state
-                                .to_extension_data(&mut metadata.extension_data)
-                                .ok();
-
-                            let path_clone = path.clone();
-                            let metadata_clone = metadata.clone();
-                            let update_result = tokio::task::spawn(async move {
-                                session::storage::update_metadata(&path_clone, &metadata_clone)
-                                    .await
-                            })
-                            .await;
-
-                            match update_result {
-                                Ok(Ok(_)) => ToolCallResult::from(Ok(vec![Content::text(
-                                    format!("Updated ({} chars)", char_count),
-                                )])),
-                                _ => ToolCallResult::from(Err(ErrorData::new(
-                                    ErrorCode::INTERNAL_ERROR,
-                                    "Failed to update session metadata".to_string(),
-                                    None,
-                                ))),
-                            }
-                        }
-                        Err(_) => ToolCallResult::from(Err(ErrorData::new(
-                            ErrorCode::INTERNAL_ERROR,
-                            "Failed to read session metadata".to_string(),
-                            None,
-                        ))),
-                    },
-                    Err(_) => ToolCallResult::from(Err(ErrorData::new(
-                        ErrorCode::INTERNAL_ERROR,
-                        "Failed to get session path".to_string(),
-                        None,
-                    ))),
-                }
-            } else {
-                ToolCallResult::from(Err(ErrorData::new(
-                    ErrorCode::INTERNAL_ERROR,
-                    "TODO tools require an active session to persist data".to_string(),
-                    None,
-                )))
-            }
         } else if tool_call.name == ROUTER_LLM_SEARCH_TOOL_NAME {
             match self
                 .tool_route_manager
@@ -803,9 +709,6 @@ impl Agent {
                 platform_tools::manage_schedule_tool(),
             ]);
 
-            // Add task planner tools
-            prefixed_tools.extend([todo_read_tool(), todo_write_tool()]);
-
             // Dynamic task tool
             prefixed_tools.push(create_dynamic_task_tool());
 
@@ -1613,11 +1516,24 @@ mod tests {
     async fn test_todo_tools_integration() -> Result<()> {
         let agent = Agent::new();
 
-        // Test that task planner tools are listed
+        // Create a mock session to trigger TODO extension registration
+        let session = SessionConfig {
+            id: session::Identifier::Name("test-session".to_string()),
+            working_dir: std::path::PathBuf::from("/tmp"),
+            schedule_id: None,
+            execution_mode: None,
+            max_turns: None,
+            retry_config: None,
+        };
+
+        // Ensure TODO extension is registered
+        agent.ensure_todo_extension(&Some(session)).await;
+
+        // Test that task planner tools are listed through the extension
         let tools = agent.list_tools(None).await;
 
-        let todo_read = tools.iter().find(|tool| tool.name == TODO_READ_TOOL_NAME);
-        let todo_write = tools.iter().find(|tool| tool.name == TODO_WRITE_TOOL_NAME);
+        let todo_read = tools.iter().find(|tool| tool.name == "todo__read");
+        let todo_write = tools.iter().find(|tool| tool.name == "todo__write");
 
         assert!(todo_read.is_some(), "TODO read tool should be present");
         assert!(todo_write.is_some(), "TODO write tool should be present");
diff --git a/crates/goose/src/agents/mod.rs b/crates/goose/src/agents/mod.rs
index ae668b714ea1..4f6879e00315 100644
--- a/crates/goose/src/agents/mod.rs
+++ b/crates/goose/src/agents/mod.rs
@@ -18,6 +18,7 @@ pub mod subagent;
 pub mod subagent_execution_tool;
 pub mod subagent_handler;
 mod subagent_task_config;
+pub mod todo_mcp_client;
 pub mod todo_tools;
 mod tool_execution;
 mod tool_route_manager;
diff --git a/crates/goose/src/agents/reply_parts.rs b/crates/goose/src/agents/reply_parts.rs
index 14cbc7888e50..38cf74346f54 100644
--- a/crates/goose/src/agents/reply_parts.rs
+++ b/crates/goose/src/agents/reply_parts.rs
@@ -1,6 +1,7 @@
 use anyhow::Result;
 use std::collections::HashSet;
 use std::sync::Arc;
+use tokio::sync::Mutex;
 
 use async_stream::try_stream;
 use futures::stream::StreamExt;
@@ -32,6 +33,50 @@ async fn toolshim_postprocess(
 }
 
 impl Agent {
+    /// Register TODO extension if session is available
+    pub async fn ensure_todo_extension(
+        &self,
+        session: &Option<crate::agents::types::SessionConfig>,
+    ) {
+        // Check if TODO extension is already registered
+        if self
+            .extension_manager
+            .list_extensions()
+            .await
+            .unwrap_or_default()
+            .contains(&"todo".to_string())
+        {
+            return;
+        }
+
+        // Register TODO extension if we have a session
+        if let Some(session_config) = session {
+            let todo_client = super::todo_mcp_client::TodoMcpClient::with_session(session_config);
+            let server_info = {
+                use mcp_client::client::McpClientTrait;
+                todo_client.get_info().cloned()
+            };
+            let config = crate::agents::extension::ExtensionConfig::Builtin {
+                name: "todo".to_string(),
+                display_name: Some("Task Management".to_string()),
+                description: Some("Persistent task tracking throughout your session".to_string()),
+                timeout: None,
+                bundled: Some(true),
+                available_tools: vec!["read".to_string(), "write".to_string()],
+            };
+
+            self.extension_manager
+                .add_client(
+                    "todo".to_string(),
+                    config,
+                    Arc::new(Mutex::new(Box::new(todo_client))),
+                    server_info,
+                    None,
+                )
+                .await;
+        }
+    }
+
     /// Prepares tools and system prompt for a provider request
     pub async fn prepare_tools_and_prompt(&self) -> anyhow::Result<(Vec<Tool>, Vec<Tool>, String)> {
         // Get router enabled status
diff --git a/crates/goose/src/agents/todo_mcp_client.rs b/crates/goose/src/agents/todo_mcp_client.rs
new file mode 100644
index 000000000000..d7b674bd54e3
--- /dev/null
+++ b/crates/goose/src/agents/todo_mcp_client.rs
@@ -0,0 +1,417 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use mcp_client::client::{Error, McpClientTrait};
+use rmcp::model::{
+    CallToolResult, Content, GetPromptResult, InitializeResult, ListPromptsResult,
+    ListResourcesResult, ListToolsResult, ReadResourceResult, ServerNotification, Tool,
+};
+use serde_json::Value;
+use tokio::sync::{mpsc, Mutex};
+use tokio_util::sync::CancellationToken;
+
+use crate::agents::todo_tools::{todo_read_tool, todo_write_tool};
+use crate::agents::types::SessionConfig;
+use crate::session::extension_data::ExtensionState;
+use crate::session::{self, TodoState};
+
+/// Storage trait for TODO persistence
+#[async_trait]
+pub trait TodoStorage: Send + Sync {
+    async fn read(&self) -> Result<String, String>;
+    async fn write(&self, content: String) -> Result<String, String>;
+}
+
+/// Session-based TODO storage implementation
+pub struct SessionTodoStorage {
+    session_id: session::Identifier,
+}
+
+impl SessionTodoStorage {
+    pub fn new(session_id: session::Identifier) -> Self {
+        Self { session_id }
+    }
+}
+
+#[async_trait]
+impl TodoStorage for SessionTodoStorage {
+    async fn read(&self) -> Result<String, String> {
+        let session_path = session::storage::get_path(self.session_id.clone())
+            .map_err(|e| format!("Failed to get session path: {}", e))?;
+
+        let metadata = session::storage::read_metadata(&session_path)
+            .map_err(|e| format!("Failed to read session metadata: {}", e))?;
+
+        let content = TodoState::from_extension_data(&metadata.extension_data)
+            .map(|state| state.content)
+            .unwrap_or_default();
+
+        Ok(content)
+    }
+
+    async fn write(&self, content: String) -> Result<String, String> {
+        // Character limit validation
+        let char_count = content.chars().count();
+        let max_chars = std::env::var("GOOSE_TODO_MAX_CHARS")
+            .ok()
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(50_000);
+
+        if max_chars > 0 && char_count > max_chars {
+            return Err(format!(
+                "Todo list too large: {} chars (max: {})",
+                char_count, max_chars
+            ));
+        }
+
+        let session_path = session::storage::get_path(self.session_id.clone())
+            .map_err(|e| format!("Failed to get session path: {}", e))?;
+
+        let mut metadata = session::storage::read_metadata(&session_path)
+            .map_err(|e| format!("Failed to read session metadata: {}", e))?;
+
+        let todo_state = TodoState::new(content);
+        todo_state
+            .to_extension_data(&mut metadata.extension_data)
+            .map_err(|e| format!("Failed to update extension data: {}", e))?;
+
+        session::storage::update_metadata(&session_path, &metadata)
+            .await
+            .map_err(|e| format!("Failed to update session metadata: {}", e))?;
+
+        Ok(format!("Updated ({} chars)", char_count))
+    }
+}
+
+/// In-memory TODO storage for sessions without persistence
+pub struct MemoryTodoStorage {
+    content: Arc<Mutex<String>>,
+}
+
+impl MemoryTodoStorage {
+    pub fn new() -> Self {
+        Self {
+            content: Arc::new(Mutex::new(String::new())),
+        }
+    }
+}
+
+impl Default for MemoryTodoStorage {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl TodoStorage for MemoryTodoStorage {
+    async fn read(&self) -> Result<String, String> {
+        Ok(self.content.lock().await.clone())
+    }
+
+    async fn write(&self, content: String) -> Result<String, String> {
+        let char_count = content.chars().count();
+        *self.content.lock().await = content;
+        Ok(format!("Updated ({} chars)", char_count))
+    }
+}
+
+/// Internal MCP client for TODO functionality
+pub struct TodoMcpClient {
+    storage: Arc<dyn TodoStorage>,
+    server_info: InitializeResult,
+}
+
+impl TodoMcpClient {
+    pub fn new(storage: Arc<dyn TodoStorage>) -> Self {
+        let instructions = r#"The todo extension provides persistent task management throughout your session.
+
+These tools help you track multi-step work, maintain context between interactions, and ensure systematic task completion.
+
+## Task Management Guidelines
+
+**Required Usage:**
+- Use `todo__read` and `todo__write` for any task with 2+ steps, multiple files/components, or uncertain scope
+- Skipping these tools when needed is considered an error
+
+**Workflow:**
+1. Start: Always `todo__read` first, then `todo__write` a brief checklist using Markdown checkboxes
+2. During: After each major action, reread via `todo__read`, then update via `todo__write` - mark completed items, add new discoveries, note blockers
+3. Finish: Ensure every item is checked, or clearly list what remains
+
+**Critical:** `todo__write` replaces the entire list. Always read before writing - not doing so is an error.
+
+**Best Practices:**
+- Keep items short, specific, and action-oriented
+- Use nested checkboxes for subtasks
+- Include context about blockers or dependencies
+
+Example format:
+```markdown
+- [x] Analyze request fully
+- [ ] Create implementation plan
+  - [x] General guidelines
+  - [ ] Sample work
+- [ ] Begin on implementation plan
+```"#;
+
+        let server_info = InitializeResult {
+            protocol_version: rmcp::model::ProtocolVersion::V_2025_03_26,
+            capabilities: rmcp::model::ServerCapabilities {
+                tools: Some(rmcp::model::ToolsCapability { list_changed: None }),
+                prompts: None,
+                resources: None,
+                logging: None,
+                experimental: None,
+                completions: None,
+            },
+            server_info: rmcp::model::Implementation {
+                name: "todo".to_string(),
+                version: "1.0.0".to_string(),
+            },
+            instructions: Some(instructions.to_string()),
+        };
+
+        Self {
+            storage,
+            server_info,
+        }
+    }
+
+    pub fn with_session(session: &SessionConfig) -> Self {
+        // Use the session ID directly - the storage layer will handle it properly
+        let storage = Arc::new(SessionTodoStorage::new(session.id.clone()));
+        Self::new(storage)
+    }
+
+    pub fn memory_only() -> Self {
+        let storage = Arc::new(MemoryTodoStorage::new());
+        Self::new(storage)
+    }
+}
+
+#[async_trait]
+impl McpClientTrait for TodoMcpClient {
+    fn get_info(&self) -> Option<&InitializeResult> {
+        Some(&self.server_info)
+    }
+
+    async fn list_resources(
+        &self,
+        _next_cursor: Option<String>,
+        _cancellation_token: CancellationToken,
+    ) -> Result<ListResourcesResult, Error> {
+        Ok(ListResourcesResult {
+            resources: vec![],
+            next_cursor: None,
+        })
+    }
+
+    async fn read_resource(
+        &self,
+        _uri: &str,
+        _cancellation_token: CancellationToken,
+    ) -> Result<ReadResourceResult, Error> {
+        Err(Error::UnexpectedResponse)
+    }
+
+    async fn list_tools(
+        &self,
+        _next_cursor: Option<String>,
+        _cancellation_token: CancellationToken,
+    ) -> Result<ListToolsResult, Error> {
+        // Return unprefixed tools - the extension manager will add the prefix
+        Ok(ListToolsResult {
+            tools: vec![
+                Tool {
+                    name: "read".into(),
+                    description: todo_read_tool().description,
+                    input_schema: todo_read_tool().input_schema,
+                    annotations: todo_read_tool().annotations,
+                    output_schema: todo_read_tool().output_schema,
+                },
+                Tool {
+                    name: "write".into(),
+                    description: todo_write_tool().description,
+                    input_schema: todo_write_tool().input_schema,
+                    annotations: todo_write_tool().annotations,
+                    output_schema: todo_write_tool().output_schema,
+                },
+            ],
+            next_cursor: None,
+        })
+    }
+
+    async fn call_tool(
+        &self,
+        name: &str,
+        arguments: Value,
+        _cancellation_token: CancellationToken,
+    ) -> Result<CallToolResult, Error> {
+        match name {
+            "read" => {
+                let content = self.storage.read().await.map_err(|e| {
+                    Error::McpError(rmcp::model::ErrorData::new(
+                        rmcp::model::ErrorCode::INTERNAL_ERROR,
+                        e,
+                        None,
+                    ))
+                })?;
+                Ok(CallToolResult {
+                    content: vec![Content::text(content)],
+                    is_error: None,
+                    structured_content: None,
+                })
+            }
+            "write" => {
+                let content = arguments
+                    .get("content")
+                    .and_then(|v| v.as_str())
+                    .ok_or_else(|| {
+                        Error::McpError(rmcp::model::ErrorData::new(
+                            rmcp::model::ErrorCode::INVALID_PARAMS,
+                            "Missing 'content' parameter".to_string(),
+                            None,
+                        ))
+                    })?
+                    .to_string();
+
+                let result = self.storage.write(content).await.map_err(|e| {
+                    Error::McpError(rmcp::model::ErrorData::new(
+                        rmcp::model::ErrorCode::INTERNAL_ERROR,
+                        e,
+                        None,
+                    ))
+                })?;
+
+                Ok(CallToolResult {
+                    content: vec![Content::text(result)],
+                    is_error: None,
+                    structured_content: None,
+                })
+            }
+            _ => Err(Error::UnexpectedResponse),
+        }
+    }
+
+    async fn list_prompts(
+        &self,
+        _next_cursor: Option<String>,
+        _cancellation_token: CancellationToken,
+    ) -> Result<ListPromptsResult, Error> {
+        Ok(ListPromptsResult {
+            prompts: vec![],
+            next_cursor: None,
+        })
+    }
+
+    async fn get_prompt(
+        &self,
+        _name: &str,
+        _arguments: Value,
+        _cancellation_token: CancellationToken,
+    ) -> Result<GetPromptResult, Error> {
+        Err(Error::UnexpectedResponse)
+    }
+
+    async fn subscribe(&self) -> mpsc::Receiver<ServerNotification> {
+        // Return a receiver that will never send anything
+        let (_tx, rx) = mpsc::channel(1);
+        rx
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_memory_storage() {
+        let storage = MemoryTodoStorage::new();
+
+        // Test initial read
+        let content = storage.read().await.unwrap();
+        assert_eq!(content, "");
+
+        // Test write
+        let result = storage.write("- [ ] Test task".to_string()).await.unwrap();
+        assert!(result.contains("Updated"));
+
+        // Test read after write
+        let content = storage.read().await.unwrap();
+        assert_eq!(content, "- [ ] Test task");
+    }
+
+    #[tokio::test]
+    async fn test_todo_client_list_tools() {
+        let client = TodoMcpClient::memory_only();
+
+        let tools = client
+            .list_tools(None, CancellationToken::default())
+            .await
+            .unwrap();
+
+        assert_eq!(tools.tools.len(), 2);
+        assert_eq!(tools.tools[0].name, "read");
+        assert_eq!(tools.tools[1].name, "write");
+    }
+
+    #[tokio::test]
+    async fn test_todo_client_read_write() {
+        let client = TodoMcpClient::memory_only();
+
+        // Test read empty
+        let result = client
+            .call_tool("read", serde_json::json!({}), CancellationToken::default())
+            .await
+            .unwrap();
+        assert_eq!(result.content[0], Content::text(""));
+
+        // Test write
+        let result = client
+            .call_tool(
+                "write",
+                serde_json::json!({"content": "- [ ] Task 1"}),
+                CancellationToken::default(),
+            )
+            .await
+            .unwrap();
+        assert!(result.content[0]
+            .as_text()
+            .unwrap()
+            .text
+            .contains("Updated"));
+
+        // Test read after write
+        let result = client
+            .call_tool("read", serde_json::json!({}), CancellationToken::default())
+            .await
+            .unwrap();
+        assert_eq!(result.content[0], Content::text("- [ ] Task 1"));
+    }
+
+    #[tokio::test]
+    async fn test_todo_client_invalid_tool() {
+        let client = TodoMcpClient::memory_only();
+
+        let result = client
+            .call_tool(
+                "invalid",
+                serde_json::json!({}),
+                CancellationToken::default(),
+            )
+            .await;
+
+        assert!(matches!(result, Err(Error::UnexpectedResponse)));
+    }
+
+    #[tokio::test]
+    async fn test_todo_client_missing_content_param() {
+        let client = TodoMcpClient::memory_only();
+
+        let result = client
+            .call_tool("write", serde_json::json!({}), CancellationToken::default())
+            .await;
+
+        assert!(matches!(result, Err(Error::McpError(_))));
+    }
+}
diff --git a/crates/goose/src/prompts/system.md b/crates/goose/src/prompts/system.md
index 1bcf22993c72..2681696a0dbc 100644
--- a/crates/goose/src/prompts/system.md
+++ b/crates/goose/src/prompts/system.md
@@ -38,23 +38,6 @@ No extensions are defined. You should let the user know that they should add ext
 
 {{tool_selection_strategy}}
 
-# Task Management
-
-- Required — use `todo__read` and `todo__write` for any task with 2+ steps, multiple files/components, or uncertain scope. Skipping them is an error.
-- Start — `todo__read`, then `todo__write` a brief checklist (Markdown checkboxes).
-- During — after each major action, update via `todo__write`: mark done, add/edit items, note blockers/dependencies.
-- Finish — ensure every item is checked, or clearly list what remains.
-- Overwrite warning — `todo__write` replaces the entire list; always read before writing. It is an error to not read before writing.
-- Quality — keep items short, specific, and action‑oriented.
-
-Template:
-```markdown
-- [ ] Implement feature X
-  - [ ] Update API
-  - [ ] Write tests
-- [ ] Blocked: waiting on credentials
-```
-
 # Response Guidelines
 
 - Use Markdown formatting for all responses.