Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/pr-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,14 @@ jobs:
SKIP_BUILD: 1
run: |
bash scripts/test_subrecipes.sh

- name: Run Compaction Tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOSE_PROVIDER: anthropic
GOOSE_MODEL: claude-sonnet-4-5-20250929
HOME: /tmp/goose-home
GOOSE_DISABLE_KEYRING: 1
SKIP_BUILD: 1
run: |
bash scripts/test_compaction.sh
6 changes: 3 additions & 3 deletions crates/goose/src/agents/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -750,14 +750,14 @@ impl Agent {
.clone()
.ok_or_else(|| anyhow::anyhow!("Session {} has no conversation", session_config.id))?;

let needs_auto_compact =
crate::context_mgmt::check_if_compaction_needed(self, &conversation, None, &session)
let needs_auto_compact = !is_manual_compact
&& crate::context_mgmt::check_if_compaction_needed(self, &conversation, None, &session)
.await?;

let conversation_to_compact = conversation.clone();

Ok(Box::pin(async_stream::try_stream! {
let final_conversation = if !needs_auto_compact {
let final_conversation = if !needs_auto_compact && !is_manual_compact {
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition !needs_auto_compact && !is_manual_compact is logically equivalent to !(needs_auto_compact || is_manual_compact), which can be simplified. However, given that needs_auto_compact already incorporates the !is_manual_compact check (line 753), this condition could be simplified to just !needs_auto_compact since when is_manual_compact is true, needs_auto_compact will always be false.

Suggested change
let final_conversation = if !needs_auto_compact && !is_manual_compact {
let final_conversation = if !needs_auto_compact {

Copilot uses AI. Check for mistakes.
conversation
} else {
if !is_manual_compact {
Expand Down
242 changes: 242 additions & 0 deletions scripts/test_compaction.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/bin/bash

# Compaction smoke test script
# Tests both manual (trigger prompt) and auto compaction (threshold-based)

if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The export $(grep -v '^#' .env | xargs) command is vulnerable to command injection if the .env file contains malicious content. Consider using a safer approach like set -a; source .env; set +a or iterate through the file with proper validation of variable names and values.

Suggested change
export $(grep -v '^#' .env | xargs)
set -a; source .env; set +a

Copilot uses AI. Check for mistakes.
fi

if [ -z "$SKIP_BUILD" ]; then
echo "Building goose..."
cargo build --release --bin goose
echo ""
else
echo "Skipping build (SKIP_BUILD is set)..."
echo ""
fi

SCRIPT_DIR=$(pwd)
GOOSE_BIN="$SCRIPT_DIR/target/release/goose"

# Validation function to check compaction structure in session JSON
validate_compaction() {
local session_id=$1
local test_name=$2

echo "Validating compaction structure for session: $session_id"

# Export the session to JSON
local session_json=$($GOOSE_BIN session export --format json --session-id "$session_id" 2>&1)

if [ $? -ne 0 ]; then
echo "✗ FAILED: Could not export session JSON"
echo " Error: $session_json"
return 1
fi

if ! command -v jq &> /dev/null; then
echo "⚠ WARNING: jq not available, cannot validate compaction structure"
return 0
fi

# Check basic structure
echo "$session_json" | jq -e '.conversation' > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "✗ FAILED: Session JSON missing 'conversation' field"
return 1
fi

local message_count=$(echo "$session_json" | jq '.conversation | length' 2>/dev/null)
echo " Session has $message_count messages"

# Look for a summary message (assistant role with userVisible=false, agentVisible=true)
local has_summary=$(echo "$session_json" | jq '[.conversation[] | select(.role == "assistant" and .metadata.userVisible == false and .metadata.agentVisible == true)] | length > 0' 2>/dev/null)

if [ "$has_summary" != "true" ]; then
echo "✗ FAILED: No summary message found (expected assistant message with userVisible=false, agentVisible=true)"
return 1
fi
echo "✓ Found summary message with correct visibility flags"

# Check for original messages with userVisible=true, agentVisible=false
local has_hidden_originals=$(echo "$session_json" | jq '[.conversation[] | select(.metadata.userVisible == true and .metadata.agentVisible == false)] | length > 0' 2>/dev/null)

if [ "$has_hidden_originals" != "true" ]; then
echo "⚠ WARNING: No original messages found with userVisible=true, agentVisible=false"
echo " This might be OK if all messages were compacted"
else
echo "✓ Found original messages hidden from agent (userVisible=true, agentVisible=false)"
fi

# For auto-compaction, check for the preserved user message (userVisible=true, agentVisible=true)
local has_preserved_user=$(echo "$session_json" | jq '[.conversation[] | select(.role == "user" and .metadata.userVisible == true and .metadata.agentVisible == true)] | length > 0' 2>/dev/null)

if [ "$has_preserved_user" == "true" ]; then
echo "✓ Found preserved user message (userVisible=true, agentVisible=true)"
fi

echo "✓ SUCCESS: Compaction structure is valid for $test_name"
return 0
}

echo "=================================================="
echo "COMPACTION SMOKE TESTS"
echo "=================================================="
echo ""

# Check if jq is available
if ! command -v jq &> /dev/null; then
echo "⚠ WARNING: jq is not installed. Compaction structure validation will be limited."
echo " Install jq to enable full validation: brew install jq (macOS) or apt-get install jq (Linux)"
echo ""
fi

RESULTS=()

# ==================================================
# TEST 1: Manual Compaction
# ==================================================
echo "---------------------------------------------------"
echo "TEST 1: Manual Compaction via trigger prompt"
echo "---------------------------------------------------"

TESTDIR=$(mktemp -d)
echo "hello world" > "$TESTDIR/hello.txt"
echo "Test directory: $TESTDIR"
echo ""

OUTPUT=$(mktemp)

echo "Step 1: Creating session with initial messages..."
(cd "$TESTDIR" && "$GOOSE_BIN" run --text "list files and read hello.txt" 2>&1) | tee "$OUTPUT"

if ! command -v jq &> /dev/null; then
echo "✗ FAILED: jq is required for this test"
RESULTS+=("✗ Manual Compaction (jq required)")
rm -f "$OUTPUT"
rm -rf "$TESTDIR"
else
SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null)

if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then
echo "✗ FAILED: Could not create session"
RESULTS+=("✗ Manual Compaction (no session)")
else
echo ""
echo "Session created: $SESSION_ID"
echo "Step 2: Sending manual compaction trigger..."

# Send the manual compact trigger prompt
(cd "$TESTDIR" && "$GOOSE_BIN" run --resume --session-id "$SESSION_ID" --text "Please compact this conversation" 2>&1) | tee -a "$OUTPUT"

echo ""
echo "Checking for compaction evidence..."

if grep -qi "compacting\|compacted\|compaction" "$OUTPUT"; then
echo "✓ SUCCESS: Manual compaction was triggered"

if validate_compaction "$SESSION_ID" "manual compaction"; then
RESULTS+=("✓ Manual Compaction")
else
RESULTS+=("✗ Manual Compaction (structure validation failed)")
fi
else
echo "✗ FAILED: Manual compaction was not triggered"
RESULTS+=("✗ Manual Compaction")
fi
fi

rm -f "$OUTPUT"
rm -rf "$TESTDIR"
fi

echo ""
echo ""

# ==================================================
# TEST 2: Auto Compaction
# ==================================================
echo "---------------------------------------------------"
echo "TEST 2: Auto Compaction via threshold (0.01)"
echo "---------------------------------------------------"

TESTDIR=$(mktemp -d)
echo "test content" > "$TESTDIR/test.txt"
echo "Test directory: $TESTDIR"
echo ""

# Set auto-compact threshold very low (1%) to trigger it quickly
export GOOSE_AUTO_COMPACT_THRESHOLD=0.01

OUTPUT=$(mktemp)

echo "Step 1: Creating session with first message..."
(cd "$TESTDIR" && "$GOOSE_BIN" run --text "hello" 2>&1) | tee "$OUTPUT"

if ! command -v jq &> /dev/null; then
echo "✗ FAILED: jq is required for this test"
RESULTS+=("✗ Auto Compaction (jq required)")
else
SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null)

if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then
echo "✗ FAILED: Could not create session"
RESULTS+=("✗ Auto Compaction (no session)")
else
echo ""
echo "Session created: $SESSION_ID"
echo "Step 2: Sending second message (should trigger auto-compact)..."

# Send second message - auto-compaction should trigger before processing this
(cd "$TESTDIR" && "$GOOSE_BIN" run --resume --session-id "$SESSION_ID" --text "hi again" 2>&1) | tee -a "$OUTPUT"

echo ""
echo "Checking for auto-compaction evidence..."

if grep -qi "auto.*compact\|exceeded.*auto.*compact.*threshold" "$OUTPUT"; then
echo "✓ SUCCESS: Auto compaction was triggered"

if validate_compaction "$SESSION_ID" "auto compaction"; then
RESULTS+=("✓ Auto Compaction")
else
RESULTS+=("✗ Auto Compaction (structure validation failed)")
fi
else
echo "✗ FAILED: Auto compaction was not triggered"
echo " Expected to see auto-compact messages with threshold of 0.01"
RESULTS+=("✗ Auto Compaction")
fi
fi
fi

# Unset the env variable
unset GOOSE_AUTO_COMPACT_THRESHOLD

rm -f "$OUTPUT"
rm -rf "$TESTDIR"

echo ""
echo ""

# ==================================================
# Summary
# ==================================================
echo "=================================================="
echo "TEST SUMMARY"
echo "=================================================="
for result in "${RESULTS[@]}"; do
echo "$result"
done

# Count results
FAILURE_COUNT=$(echo "${RESULTS[@]}" | grep -o "✗" | wc -l | tr -d ' ')

if [ "$FAILURE_COUNT" -gt 0 ]; then
echo ""
echo "❌ $FAILURE_COUNT test(s) failed!"
exit 1
else
echo ""
echo "✅ All tests passed!"
fi