diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index aa89b8a7919d..333d9d8ab6f6 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -115,3 +115,14 @@ jobs: # Run the provider test script (binary already built and downloaded) bash scripts/test_providers.sh + + - name: Run Subrecipe Tests + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GOOSE_PROVIDER: anthropic + GOOSE_MODEL: claude-sonnet-4-5-20250929 + HOME: /tmp/goose-home + GOOSE_DISABLE_KEYRING: 1 + SKIP_BUILD: 1 + run: | + bash scripts/test_subrecipes.sh diff --git a/scripts/test-subrecipes-examples/subrecipes/activity-suggestions.yaml b/scripts/test-subrecipes-examples/subrecipes/activity-suggestions.yaml new file mode 100644 index 000000000000..9a92b1a1a715 --- /dev/null +++ b/scripts/test-subrecipes-examples/subrecipes/activity-suggestions.yaml @@ -0,0 +1,23 @@ +version: "1.0.0" +title: "Activity Recommender" +description: "Suggest activities based on weather conditions" +instructions: | + You are a travel expert. Recommend appropriate activities and attractions + based on current weather conditions. + +parameters: + - key: weather_conditions + input_type: string + requirement: required + description: "Current weather conditions to base recommendations on" + +extensions: + - type: builtin + name: developer + timeout: 300 + bundled: true + +prompt: | + Based on these weather conditions: {{ weather_conditions }}, + suggest appropriate activities, attractions, and travel tips. + Include both indoor and outdoor options as relevant. diff --git a/scripts/test-subrecipes-examples/subrecipes/weather-data.yaml b/scripts/test-subrecipes-examples/subrecipes/weather-data.yaml new file mode 100644 index 000000000000..6efb9ce510c7 --- /dev/null +++ b/scripts/test-subrecipes-examples/subrecipes/weather-data.yaml @@ -0,0 +1,23 @@ +version: "1.0.0" +title: "Weather Data Collector" +description: "Fetch current weather conditions for a location" +instructions: | + You are a weather data specialist. Gather current weather information + including temperature, conditions, and seasonal context. + +parameters: + - key: location + input_type: string + requirement: required + description: "City or location to get weather data for" + +extensions: + - type: builtin + name: developer + timeout: 300 + bundled: true + +prompt: | + Get the current weather conditions for {{ location }}. + Include temperature, weather conditions (sunny, rainy, etc.), + and any relevant seasonal information. diff --git a/scripts/test-subrecipes-examples/travel_planner.yaml b/scripts/test-subrecipes-examples/travel_planner.yaml new file mode 100644 index 000000000000..1f48b11878f4 --- /dev/null +++ b/scripts/test-subrecipes-examples/travel_planner.yaml @@ -0,0 +1,23 @@ +version: "1.0.0" +title: "Travel Activity Planner" +description: "Get weather data and suggest appropriate activities" +instructions: | + Plan activities by first getting weather data, then suggesting activities based on conditions. + +prompt: | + Plan activities for Sydney by first getting weather data, then suggesting activities based on the weather conditions we receive. + +sub_recipes: + - name: weather_data + path: "{{ recipe_dir }}/subrecipes/weather-data.yaml" + # No values - location parameter comes from prompt context + + - name: activity_suggestions + path: "{{ recipe_dir }}/subrecipes/activity-suggestions.yaml" + # weather_conditions parameter comes from conversation context + +extensions: + - type: builtin + name: developer + timeout: 300 + bundled: true diff --git a/scripts/test-subrecipes-examples/travel_planner_parallel.yaml b/scripts/test-subrecipes-examples/travel_planner_parallel.yaml new file mode 100644 index 000000000000..bb1ee1a03717 --- /dev/null +++ b/scripts/test-subrecipes-examples/travel_planner_parallel.yaml @@ -0,0 +1,23 @@ +version: "1.0.0" +title: "Travel Activity Planner (Parallel)" +description: "Get weather data and suggest activities in parallel" +instructions: | + Plan activities by getting weather data and activity suggestions in parallel to save time. + +prompt: | + Run the following subrecipes in parallel to plan activities for Sydney: + - use weather_data subrecipe to get the weather for Sydney + - use activity_suggestions subrecipe to suggest activities for overcast, cool weather + +sub_recipes: + - name: weather_data + path: "{{ recipe_dir }}/subrecipes/weather-data.yaml" + + - name: activity_suggestions + path: "{{ recipe_dir }}/subrecipes/activity-suggestions.yaml" + +extensions: + - type: builtin + name: developer + timeout: 300 + bundled: true diff --git a/scripts/test_subrecipes.sh b/scripts/test_subrecipes.sh new file mode 100755 index 000000000000..29920f2e2c0c --- /dev/null +++ b/scripts/test_subrecipes.sh @@ -0,0 +1,133 @@ +#!/bin/bash +set -e + +if [ -f .env ]; then + export $(grep -v '^#' .env | xargs) +fi + +if [ -z "$SKIP_BUILD" ]; then + echo "Building goose..." + cargo build --release --bin goose + echo "" +else + echo "Skipping build (SKIP_BUILD is set)..." + echo "" +fi + +SCRIPT_DIR=$(pwd) + +# Add goose binary to PATH so subagents can find it when spawning +export PATH="$SCRIPT_DIR/target/release:$PATH" + +# Set default provider and model if not already set +export GOOSE_PROVIDER="${GOOSE_PROVIDER:-anthropic}" +export GOOSE_MODEL="${GOOSE_MODEL:-claude-sonnet-4-5-20250929}" + +echo "Using provider: $GOOSE_PROVIDER" +echo "Using model: $GOOSE_MODEL" +echo "" + +TESTDIR=$(mktemp -d) +echo "Created test directory: $TESTDIR" + +cp -r "$SCRIPT_DIR/scripts/test-subrecipes-examples/"* "$TESTDIR/" +echo "Copied test recipes from scripts/test-subrecipes-examples" + +echo "" +echo "=== Testing Subrecipe Workflow ===" +echo "Recipe: $TESTDIR/travel_planner.yaml" +echo "" + +RESULTS=() + +check_recipe_output() { + local tmpfile=$1 + local mode=$2 + + if grep -q "| subrecipe" "$tmpfile"; then + echo "✓ SUCCESS: Subrecipe tools invoked" + RESULTS+=("✓ Subrecipe tool invocation ($mode)") + else + echo "✗ FAILED: No evidence of subrecipe tool invocation" + RESULTS+=("✗ Subrecipe tool invocation ($mode)") + fi + + if grep -q "weather_data" "$tmpfile" && grep -q "activity_suggestions" "$tmpfile"; then + echo "✓ SUCCESS: Both subrecipes (weather_data, activity_suggestions) found in output" + RESULTS+=("✓ Both subrecipes present ($mode)") + else + echo "✗ FAILED: Not all subrecipes found in output" + RESULTS+=("✗ Subrecipe names ($mode)") + fi + + if grep -q "| subagent" "$tmpfile"; then + echo "✓ SUCCESS: Subagent execution detected" + RESULTS+=("✓ Subagent execution ($mode)") + else + echo "✗ FAILED: No evidence of subagent execution" + RESULTS+=("✗ Subagent execution ($mode)") + fi +} + +echo "Test 1: Running recipe with session..." +TMPFILE=$(mktemp) +if (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --recipe travel_planner.yaml 2>&1) | tee "$TMPFILE"; then + echo "✓ SUCCESS: Recipe completed successfully" + RESULTS+=("✓ Recipe exit code (with session)") + check_recipe_output "$TMPFILE" "with session" +else + echo "✗ FAILED: Recipe execution failed" + RESULTS+=("✗ Recipe exit code (with session)") +fi +rm "$TMPFILE" +echo "" + +echo "Test 2: Running recipe in --no-session mode..." +TMPFILE=$(mktemp) +if (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --recipe travel_planner.yaml --no-session 2>&1) | tee "$TMPFILE"; then + echo "✓ SUCCESS: Recipe completed successfully" + RESULTS+=("✓ Recipe exit code (--no-session)") + check_recipe_output "$TMPFILE" "--no-session" +else + echo "✗ FAILED: Recipe execution failed" + RESULTS+=("✗ Recipe exit code (--no-session)") +fi +rm "$TMPFILE" +echo "" + +echo "Test 3: Running recipe with parallel subrecipes..." +TMPFILE=$(mktemp) +if (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --recipe travel_planner_parallel.yaml 2>&1) | tee "$TMPFILE"; then + echo "✓ SUCCESS: Recipe completed successfully" + RESULTS+=("✓ Recipe exit code (parallel)") + check_recipe_output "$TMPFILE" "parallel" + + if grep -q "execution_mode: parallel" "$TMPFILE"; then + echo "✓ SUCCESS: Parallel execution mode detected" + RESULTS+=("✓ Parallel execution mode") + else + echo "✗ FAILED: Parallel execution mode not detected" + RESULTS+=("✗ Parallel execution mode") + fi +else + echo "✗ FAILED: Recipe execution failed" + RESULTS+=("✗ Recipe exit code (parallel)") +fi +rm "$TMPFILE" +echo "" + +rm -rf "$TESTDIR" + +echo "=== Test Summary ===" +for result in "${RESULTS[@]}"; do + echo "$result" +done + +if echo "${RESULTS[@]}" | grep -q "✗"; then + echo "" + echo "Some tests failed!" + exit 1 +else + echo "" + echo "All tests passed!" +fi