diff --git a/src/goose/toolkit/developer.py b/src/goose/toolkit/developer.py index 450ce244..e5cc864c 100644 --- a/src/goose/toolkit/developer.py +++ b/src/goose/toolkit/developer.py @@ -1,10 +1,15 @@ -from pathlib import Path -from subprocess import CompletedProcess, run -from typing import List, Dict import os -from goose.utils.check_shell_command import is_dangerous_command +import re +import subprocess +import time +from pathlib import Path +from typing import Dict, List from exchange import Message +from goose.toolkit.base import Toolkit, tool +from goose.toolkit.utils import get_language, render_template +from goose.utils.ask import ask_an_ai +from goose.utils.check_shell_command import is_dangerous_command from rich import box from rich.markdown import Markdown from rich.panel import Panel @@ -12,9 +17,6 @@ from rich.table import Table from rich.text import Text -from goose.toolkit.base import Toolkit, tool -from goose.toolkit.utils import get_language, render_template - def keep_unsafe_command_prompt(command: str) -> bool: command_text = Text(command, style="bold red") @@ -136,7 +138,7 @@ def read_file(self, path: str) -> str: @tool def shell(self, command: str) -> str: """ - Execute a command on the shell (in OSX) + Execute a command on the shell This will return the output and error concatenated into a single string, as you would see from running on the command line. There will also be an indication @@ -146,11 +148,7 @@ def shell(self, command: str) -> str: command (str): The shell command to run. It can support multiline statements if you need to run more than one at a time """ - self.notifier.status("planning to run shell command") # Log the command being executed in a visually structured format (Markdown). - # The `.log` method is used here to log the command execution in the application's UX - # this method is dynamically attached to functions in the Goose framework to handle user-visible - # logging and integrates with the overall UI logging system self.notifier.log(Panel.fit(Markdown(f"```bash\n{command}\n```"), title="shell")) if is_dangerous_command(command): @@ -159,16 +157,87 @@ def shell(self, command: str) -> str: if not keep_unsafe_command_prompt(command): raise RuntimeError( f"The command {command} was rejected as dangerous by the user." - + " Do not proceed further, instead ask for instructions." + " Do not proceed further, instead ask for instructions." ) self.notifier.start() self.notifier.status("running shell command") - result: CompletedProcess = run(command, shell=True, text=True, capture_output=True, check=False) - if result.returncode == 0: - output = "Command succeeded" + + # Define patterns that might indicate the process is waiting for input + interaction_patterns = [ + r"Do you want to", # Common prompt phrase + r"Enter password", # Password prompt + r"Are you sure", # Confirmation prompt + r"\(y/N\)", # Yes/No prompt + r"Press any key to continue", # Awaiting keypress + r"Waiting for input", # General waiting message + r"\?\s", # Prompts starting with '? ' + ] + compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in interaction_patterns] + + proc = subprocess.Popen( + command, + shell=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + # this enables us to read lines without blocking + os.set_blocking(proc.stdout.fileno(), False) + + # Accumulate the output logs while checking if it might be blocked + output_lines = [] + last_output_time = time.time() + cutoff = 10 + while proc.poll() is None: + self.notifier.status("running shell command") + line = proc.stdout.readline() + if line: + output_lines.append(line) + last_output_time = time.time() + + # If we see a clear pattern match, we plan to abort + exit_criteria = any(pattern.search(line) for pattern in compiled_patterns) + + # and if we haven't seen a new line in 10+s, check with AI to see if it may be stuck + if not exit_criteria and time.time() - last_output_time > cutoff: + self.notifier.status("checking on shell status") + response = ask_an_ai( + input="\n".join([command] + output_lines), + prompt=( + "You will evaluate the output of shell commands to see if they may be stuck." + " Look for commands that appear to be awaiting user input, or otherwise running indefinitely (such as a web service)." # noqa + " A command that will take a while, such as downloading resources is okay." # noqa + " return [Yes] if stuck, [No] otherwise." + ), + exchange=self.exchange_view.processor, + with_tools=False, + ) + exit_criteria = "[yes]" in response.content[0].text.lower() + # We add exponential backoff for how often we check for the command being stuck + cutoff *= 10 + + if exit_criteria: + proc.terminate() + raise ValueError( + f"The command `{command}` looks like it will run indefinitely or is otherwise stuck." + f"You may be able to specify inputs if it applies to this command." + f"Otherwise to enable continued iteration, you'll need to ask the user to run this command in another terminal." # noqa + ) + + # read any remaining lines + while line := proc.stdout.readline(): + output_lines.append(line) + output = "".join(output_lines) + + # Determine the result based on the return code + if proc.returncode == 0: + result = "Command succeeded" else: - output = f"Command failed with returncode {result.returncode}" - return "\n".join([output, result.stdout, result.stderr]) + result = f"Command failed with returncode {proc.returncode}" + + # Return the combined result and outputs if we made it this far + return "\n".join([result, output]) @tool def write_file(self, path: str, content: str) -> str: diff --git a/src/goose/utils/ask.py b/src/goose/utils/ask.py index 0e34b444..c0fee1bc 100644 --- a/src/goose/utils/ask.py +++ b/src/goose/utils/ask.py @@ -1,7 +1,13 @@ from exchange import Exchange, Message, CheckpointData -def ask_an_ai(input: str, exchange: Exchange, prompt: str = "", no_history: bool = True) -> Message: +def ask_an_ai( + input: str, + exchange: Exchange, + prompt: str = "", + no_history: bool = True, + with_tools: bool = True, +) -> Message: """Sends a separate message to an LLM using a separate Exchange than the one underlying the Goose session. Can be used to summarize a file, or submit any other request that you'd like to an AI. The Exchange can have a @@ -36,6 +42,9 @@ def ask_an_ai(input: str, exchange: Exchange, prompt: str = "", no_history: bool if no_history: exchange = clear_exchange(exchange) + if not with_tools: + exchange = exchange.replace(tools=()) + if prompt: exchange = replace_prompt(exchange, prompt)