diff --git a/crates/goose-cli/src/session/task_execution_display/mod.rs b/crates/goose-cli/src/session/task_execution_display/mod.rs index 434c777271a2..b05623778630 100644 --- a/crates/goose-cli/src/session/task_execution_display/mod.rs +++ b/crates/goose-cli/src/session/task_execution_display/mod.rs @@ -18,20 +18,29 @@ pub const TASK_EXECUTION_NOTIFICATION_TYPE: &str = "task_execution"; static INITIAL_SHOWN: AtomicBool = AtomicBool::new(false); fn format_result_data_for_display(result_data: &Value) -> String { - match result_data { + const MAX_RESULT_LENGTH: usize = 100; + + let result = match result_data { Value::String(s) => s.to_string(), Value::Object(obj) => { if let Some(partial_output) = obj.get("partial_output").and_then(|v| v.as_str()) { format!("Partial output: {}", partial_output) + } else if let Some(result) = obj.get("result").and_then(|v| v.as_str()) { + // If there's a "result" field, just show that instead of the whole object + result.to_string() } else { - serde_json::to_string_pretty(obj).unwrap_or_default() + // For other objects, show a compact representation + format!("{{...}} ({} fields)", obj.len()) } } - Value::Array(arr) => serde_json::to_string_pretty(arr).unwrap_or_default(), + Value::Array(arr) => format!("[...] ({} items)", arr.len()), Value::Bool(b) => b.to_string(), Value::Number(n) => n.to_string(), Value::Null => "null".to_string(), - } + }; + + // Truncate long results to keep output clean + safe_truncate(&result, MAX_RESULT_LENGTH) } fn process_output_for_display(output: &str) -> String { @@ -92,10 +101,18 @@ fn format_tasks_update_from_event(event: &TaskExecutionNotificationEvent) -> Str display.push_str(MOVE_TO_PROGRESS_LINE); } - display.push_str(&format!( - "📊 Progress: {} total | ⏳ {} pending | 🏃 {} running | ✅ {} completed | ❌ {} failed", - stats.total, stats.pending, stats.running, stats.completed, stats.failed - )); + let mut progress_parts = vec![ + format!("📊 Progress: {} total", stats.total), + format!("⏳ {} pending", stats.pending), + format!("🏃 {} running", stats.running), + format!("✅ {} completed", stats.completed), + ]; + + if stats.failed > 0 { + progress_parts.push(format!("❌ {} failed", stats.failed)); + } + + display.push_str(&progress_parts.join(" | ")); display.push_str(&format!("{}\n\n", CLEAR_TO_EOL)); let mut sorted_tasks = tasks.clone(); @@ -124,7 +141,9 @@ fn format_tasks_complete_from_event(event: &TaskExecutionNotificationEvent) -> S summary.push_str(&format!("Total Tasks: {}\n", stats.total)); summary.push_str(&format!("✅ Completed: {}\n", stats.completed)); - summary.push_str(&format!("❌ Failed: {}\n", stats.failed)); + if stats.failed > 0 { + summary.push_str(&format!("❌ Failed: {}\n", stats.failed)); + } summary.push_str(&format!("📈 Success Rate: {:.1}%\n", stats.success_rate)); if !failed_tasks.is_empty() { @@ -154,18 +173,39 @@ fn format_task_display(task: &TaskInfo) -> String { TaskStatus::Failed => "❌", }; + // Show a clean, informative task header + // For text_instruction tasks, extract the instruction from metadata + // For sub_recipe tasks, show the recipe name + let task_description = if task.task_type == "text_instruction" { + // Extract the instruction text from the metadata (format: "instruction=...") + if task.task_metadata.starts_with("instruction=") { + task.task_metadata + .strip_prefix("instruction=") + .unwrap_or(&task.task_metadata) + .to_string() + } else { + // Fallback to task name or type + if !task.task_name.is_empty() && task.task_name != task.id { + task.task_name.clone() + } else { + task.task_type.clone() + } + } + } else { + // For sub_recipe, show the recipe name if available + if !task.task_name.is_empty() && task.task_name != task.id { + task.task_name.clone() + } else { + task.task_type.clone() + } + }; + task_display.push_str(&format!( - "{} {} ({}){}\n", - status_icon, task.task_name, task.task_type, CLEAR_TO_EOL + "{} {}{}\n", + status_icon, task_description, CLEAR_TO_EOL )); - if !task.task_metadata.is_empty() { - task_display.push_str(&format!( - " 📋 Parameters: {}{}\n", - task.task_metadata, CLEAR_TO_EOL - )); - } - + // Only show timing if available if let Some(duration_secs) = task.duration_secs { task_display.push_str(&format!(" ⏱️ {:.1}s{}\n", duration_secs, CLEAR_TO_EOL)); } diff --git a/crates/goose-cli/src/session/task_execution_display/tests.rs b/crates/goose-cli/src/session/task_execution_display/tests.rs index 7968af7da724..4d79291029d6 100644 --- a/crates/goose-cli/src/session/task_execution_display/tests.rs +++ b/crates/goose-cli/src/session/task_execution_display/tests.rs @@ -48,14 +48,11 @@ fn test_format_result_data_for_display() { let obj = json!({"key": "value", "num": 42}); let result = format_result_data_for_display(&obj); - assert!(result.contains("key")); - assert!(result.contains("value")); + assert_eq!(result, "{...} (2 fields)"); let arr = json!([1, 2, 3]); let result = format_result_data_for_display(&arr); - assert!(result.contains("1")); - assert!(result.contains("2")); - assert!(result.contains("3")); + assert_eq!(result, "[...] (3 items)"); } #[test] @@ -136,10 +133,10 @@ fn test_format_tasks_update_from_event() { assert!(result.contains("⏳ 1 pending")); assert!(result.contains("🏃 1 running")); assert!(result.contains("✅ 1 completed")); - assert!(result.contains("❌ 0 failed")); + assert!(!result.contains("❌ 0 failed")); + assert!(!result.contains("❌ failed")); assert!(result.contains("🏃 test-task")); assert!(result.contains("✅ another-task")); - assert!(result.contains("📋 Parameters: param=value")); assert!(result.contains("⏱️ 1.5s")); assert!(result.contains("💬 Processing...")); @@ -188,7 +185,7 @@ fn test_format_tasks_complete_from_event_no_failures() { assert!(!result.contains("❌ Failed Tasks:")); assert!(result.contains("📈 Success Rate: 100.0%")); - assert!(result.contains("❌ Failed: 0")); + assert!(!result.contains("❌ Failed")); } #[test] @@ -207,8 +204,7 @@ fn test_format_task_display_running() { let result = format_task_display(&task); - assert!(result.contains("🏃 data-processor (sub_recipe)")); - assert!(result.contains("📋 Parameters: input=file.txt,output=result.json")); + assert!(result.contains("🏃 data-processor")); assert!(result.contains("⏱️ 1.5s")); assert!(result.contains("💬 Processing data... ... Almost done...")); } @@ -229,7 +225,7 @@ fn test_format_task_display_completed() { let result = format_task_display(&task); - assert!(result.contains("✅ analyzer (text_instruction)")); + assert!(result.contains("✅ analyzer")); assert!(result.contains("⏱️ 3.2s")); assert!(!result.contains("📋 Parameters")); assert!(result.contains("📄")); @@ -254,7 +250,7 @@ fn test_format_task_display_failed() { let result = format_task_display(&task); - assert!(result.contains("❌ failing-task (sub_recipe)")); + assert!(result.contains("❌ failing-task")); assert!(!result.contains("⏱️")); assert!(result.contains("⚠️")); assert!(result.contains("Network connection failed after multiple retries")); @@ -276,8 +272,8 @@ fn test_format_task_display_pending() { let result = format_task_display(&task); - assert!(result.contains("⏳ waiting-task (sub_recipe)")); - assert!(result.contains("📋 Parameters: priority=high")); + assert!(result.contains("⏳ waiting-task")); + assert!(!result.contains("📋 Parameters")); assert!(!result.contains("⏱️")); assert!(!result.contains("💬")); assert!(!result.contains("📄")); diff --git a/crates/goose/src/agents/prompt_manager.rs b/crates/goose/src/agents/prompt_manager.rs index 87c3bbb93809..1dbc23bb94ec 100644 --- a/crates/goose/src/agents/prompt_manager.rs +++ b/crates/goose/src/agents/prompt_manager.rs @@ -39,25 +39,6 @@ impl PromptManager { self.system_prompt_override = Some(template); } - /// Normalize a model name (replace - and / with _, lower case) - fn normalize_model_name(name: &str) -> String { - name.replace(['-', '/', '.'], "_").to_lowercase() - } - - /// Map model (normalized) to prompt filenames; returns filename if a key is contained in the normalized model - fn model_prompt_map(model: &str) -> &'static str { - let mut map = HashMap::new(); - map.insert("gpt_4_1", "system_gpt_4.1.md"); - // Add more mappings as needed - let norm_model = Self::normalize_model_name(model); - for (key, val) in &map { - if norm_model.contains(key) { - return val; - } - } - "system.md" - } - /// Build the final system prompt /// /// * `extensions_info` – extension information for each extension/MCP @@ -122,9 +103,8 @@ impl PromptManager { let sanitized_override_prompt = sanitize_unicode_tags(override_prompt); prompt_template::render_inline_once(&sanitized_override_prompt, &context) .expect("Prompt should render") - } else if let Some(model) = &model_to_use { - // Use the fuzzy mapping to determine the prompt file, or fall back to legacy logic - let prompt_file = Self::model_prompt_map(model); + } else if let Some(_model) = &model_to_use { + let prompt_file = "system.md"; match prompt_template::render_global_file(prompt_file, &context) { Ok(prompt) => prompt, Err(_) => { @@ -177,58 +157,6 @@ impl PromptManager { mod tests { use super::*; - #[test] - fn test_normalize_model_name() { - assert_eq!(PromptManager::normalize_model_name("gpt-4.1"), "gpt_4_1"); - assert_eq!(PromptManager::normalize_model_name("gpt/3.5"), "gpt_3_5"); - assert_eq!( - PromptManager::normalize_model_name("GPT-3.5/PLUS"), - "gpt_3_5_plus" - ); - } - - #[test] - fn test_model_prompt_map_matches() { - // should match prompts based on contained normalized keys - assert_eq!( - PromptManager::model_prompt_map("gpt-4.1"), - "system_gpt_4.1.md" - ); - - assert_eq!( - PromptManager::model_prompt_map("gpt-4.1-2025-04-14"), - "system_gpt_4.1.md" - ); - - assert_eq!( - PromptManager::model_prompt_map("openai/gpt-4.1"), - "system_gpt_4.1.md" - ); - assert_eq!( - PromptManager::model_prompt_map("goose-gpt-4-1"), - "system_gpt_4.1.md" - ); - assert_eq!( - PromptManager::model_prompt_map("gpt-4-1-huge"), - "system_gpt_4.1.md" - ); - } - - #[test] - fn test_model_prompt_map_none() { - // should return system.md for unrecognized/unsupported model names - assert_eq!(PromptManager::model_prompt_map("llama-3-70b"), "system.md"); - assert_eq!(PromptManager::model_prompt_map("goose"), "system.md"); - assert_eq!( - PromptManager::model_prompt_map("claude-3.7-sonnet"), - "system.md" - ); - assert_eq!( - PromptManager::model_prompt_map("xxx-unknown-model"), - "system.md" - ); - } - #[test] fn test_build_system_prompt_sanitizes_override() { let mut manager = PromptManager::new(); diff --git a/crates/goose/src/prompts/system.md b/crates/goose/src/prompts/system.md index 1bcf22993c72..b4647352784a 100644 --- a/crates/goose/src/prompts/system.md +++ b/crates/goose/src/prompts/system.md @@ -1,9 +1,8 @@ -You are a general-purpose AI agent called Goose, created by Block, the parent company of Square, CashApp, and Tidal. Goose is being developed as an open-source software project. +You are a general-purpose AI agent called Goose. The current date is {{current_date_time}}. -Goose uses LLM providers with tool calling capability. You can be used with different language models (gpt-4o, claude-3.5-sonnet, o1, llama-3.2, deepseek-r1, etc). -These models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date. +Goose uses LLM providers with tool calling capability. You can be used with different language models. # Extensions @@ -41,6 +40,8 @@ No extensions are defined. You should let the user know that they should add ext # Task Management - Required — use `todo__read` and `todo__write` for any task with 2+ steps, multiple files/components, or uncertain scope. Skipping them is an error. +- Required: when planning a task, consider how it can be run via sub-agents using `dynamic_task__create_task`. Some tasks are best delegated to a sub-agent when you only need the result. Parallel tasks as well. If the user asks multiple things at once, default to using sub-agents. + - Start — `todo__read`, then `todo__write` a brief checklist (Markdown checkboxes). - During — after each major action, update via `todo__write`: mark done, add/edit items, note blockers/dependencies. - Finish — ensure every item is checked, or clearly list what remains. @@ -52,6 +53,8 @@ Template: - [ ] Implement feature X - [ ] Update API - [ ] Write tests + - [ ] Run tests (sub-agent in parallel) + - [ ] Run lint (sub-agent in parallel) - [ ] Blocked: waiting on credentials ``` diff --git a/crates/goose/src/prompts/system_gpt_4.1.md b/crates/goose/src/prompts/system_gpt_4.1.md deleted file mode 100644 index ad33223cbe12..000000000000 --- a/crates/goose/src/prompts/system_gpt_4.1.md +++ /dev/null @@ -1,61 +0,0 @@ -You are a general-purpose AI agent called Goose, created by Block, the parent company of Square, CashApp, and Tidal. Goose is being developed as an open-source software project. - -IMPORTANT INSTRUCTIONS: - -Please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. - -If you are not sure about file content or codebase structure, or other information pertaining to the user's request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer. It is important you use tools that can assist with providing the right context. - -CRITICAL: The str_replace command in the text_editor tool (when available) should be used most of the time, with the write tool only for new files. ALWAYS check the content of the file before editing. NEVER overwrite the whole content of a file unless directed to, always edit carefully by adding and changing content. Never leave content unfinished with comments like "rest of the file here" - -The user may direct or imply that you are to take actions, in this case, it is important to note the following guidelines: - -* If you are directed to complete a task, you should see it through. -* Your thinking should be thorough and so it's fine if it's very long. You can think step by step before and after each action you decide to take. -* Only terminate your turn when you are sure that the problem is solved. Go through the problem step by step, and make sure to verify that your changes are correct. NEVER end your turn without having solved the problem, and when you say you are going to make a tool call, make sure you ACTUALLY make the tool call, instead of ending your turn. -* You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully. -* Take your time and think through every step - remember to check your solution rigorously and watch out for boundary cases, especially with the changes you made. Your solution must be perfect. If not, continue working on it. When you are validating solutions with tools, it is important to iterate until you get success -* Do not stop and ask the user for confirmation for actions you should be taking to achieve the outcomes directed and with tools available. - - - -The current date is {{current_date_time}}. - -Goose uses LLM providers with tool calling capability. -Your model may have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date. - -# Extensions - -Extensions allow other applications to provide context to Goose. Extensions connect Goose to different data sources and tools. -You are capable of dynamically plugging into new extensions and learning how to use them. You solve higher level problems using the tools in these extensions, and can interact with multiple at once. -Use the search_available_extensions tool to find additional extensions to enable to help with your task. To enable extensions, use the enable_extension tool and provide the extension_name. You should only enable extensions found from the search_available_extensions tool. - -{% if (extensions is defined) and extensions %} -Because you dynamically load extensions, your conversation history may refer -to interactions with extensions that are not currently active. The currently -active extensions are below. Each of these extensions provides tools that are -in your tool specification. - -{% for extension in extensions %} -## {{extension.name}} -{% if extension.has_resources %} -{{extension.name}} supports resources, you can use platform__read_resource, -and platform__list_resources on this extension. -{% endif %} -{% if extension.instructions %}### Instructions -{{extension.instructions}}{% endif %} -{% endfor %} - -{% else %} -No extensions are defined. You should let the user know that they should add extensions. -{% endif %} - -# Response Guidelines - -- Use Markdown formatting for all responses. -- Follow best practices for Markdown, including: - - Using headers for organization. - - Bullet points for lists. - - Links formatted correctly, either as linked text (e.g., [this is linked text](https://example.com)) or automatic links using angle brackets (e.g., ). -- For code examples, use fenced code blocks by placing triple backticks (` ``` `) before and after the code. Include the language identifier after the opening backticks (e.g., ` ```python `) to enable syntax highlighting. -- Ensure clarity, conciseness, and proper formatting to enhance readability and usability.