diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index e301804b0390..2640db00868c 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -153,3 +153,14 @@ jobs: SKIP_BUILD: 1 run: | bash scripts/test_subrecipes.sh + + - name: Run Compaction Tests + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GOOSE_PROVIDER: anthropic + GOOSE_MODEL: claude-sonnet-4-5-20250929 + HOME: /tmp/goose-home + GOOSE_DISABLE_KEYRING: 1 + SKIP_BUILD: 1 + run: | + bash scripts/test_compaction.sh diff --git a/Cargo.lock b/Cargo.lock index b84c46478ed8..0733cd279d40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2610,7 +2610,7 @@ dependencies = [ [[package]] name = "goose" -version = "1.12.0" +version = "1.13.0" dependencies = [ "ahash", "anyhow", @@ -2691,7 +2691,7 @@ dependencies = [ [[package]] name = "goose-bench" -version = "1.12.0" +version = "1.13.0" dependencies = [ "anyhow", "async-trait", @@ -2714,7 +2714,7 @@ dependencies = [ [[package]] name = "goose-cli" -version = "1.12.0" +version = "1.13.0" dependencies = [ "agent-client-protocol", "anstream", @@ -2766,7 +2766,7 @@ dependencies = [ [[package]] name = "goose-mcp" -version = "1.12.0" +version = "1.13.0" dependencies = [ "anyhow", "async-trait", @@ -2832,7 +2832,7 @@ dependencies = [ [[package]] name = "goose-server" -version = "1.12.0" +version = "1.13.0" dependencies = [ "anyhow", "async-trait", @@ -2869,7 +2869,7 @@ dependencies = [ [[package]] name = "goose-test" -version = "1.12.0" +version = "1.13.0" dependencies = [ "clap", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index ca71b1eceae0..15a55c891629 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ resolver = "2" [workspace.package] edition = "2021" -version = "1.12.0" +version = "1.13.0" authors = ["Block "] license = "Apache-2.0" repository = "https://github.com/block/goose" diff --git a/crates/goose-server/src/routes/reply.rs b/crates/goose-server/src/routes/reply.rs index 6c5425d9a04e..5a340fa3d947 100644 --- a/crates/goose-server/src/routes/reply.rs +++ b/crates/goose-server/src/routes/reply.rs @@ -133,6 +133,7 @@ pub enum MessageEvent { }, Finish { reason: String, + token_state: TokenState, }, ModelChange { model: String, @@ -149,6 +150,27 @@ pub enum MessageEvent { Ping, } +async fn get_token_state(session_id: &str) -> TokenState { + SessionManager::get_session(session_id, false) + .await + .map(|session| TokenState { + input_tokens: session.input_tokens.unwrap_or(0), + output_tokens: session.output_tokens.unwrap_or(0), + total_tokens: session.total_tokens.unwrap_or(0), + accumulated_input_tokens: session.accumulated_input_tokens.unwrap_or(0), + accumulated_output_tokens: session.accumulated_output_tokens.unwrap_or(0), + accumulated_total_tokens: session.accumulated_total_tokens.unwrap_or(0), + }) + .inspect_err(|e| { + tracing::warn!( + "Failed to fetch session token state for {}: {}", + session_id, + e + ); + }) + .unwrap_or_default() +} + async fn stream_event( event: MessageEvent, tx: &mpsc::Sender, @@ -321,29 +343,7 @@ pub async fn reply( all_messages.push(message.clone()); - let token_state = match SessionManager::get_session(&session_id, false).await { - Ok(session) => { - TokenState { - input_tokens: session.input_tokens.unwrap_or(0), - output_tokens: session.output_tokens.unwrap_or(0), - total_tokens: session.total_tokens.unwrap_or(0), - accumulated_input_tokens: session.accumulated_input_tokens.unwrap_or(0), - accumulated_output_tokens: session.accumulated_output_tokens.unwrap_or(0), - accumulated_total_tokens: session.accumulated_total_tokens.unwrap_or(0), - } - }, - Err(e) => { - tracing::warn!("Failed to fetch session for token state: {}", e); - TokenState { - input_tokens: 0, - output_tokens: 0, - total_tokens: 0, - accumulated_input_tokens: 0, - accumulated_output_tokens: 0, - accumulated_total_tokens: 0, - } - } - }; + let token_state = get_token_state(&session_id).await; stream_event(MessageEvent::Message { message, token_state }, &tx, &cancel_token).await; } @@ -437,9 +437,12 @@ pub async fn reply( ); } + let final_token_state = get_token_state(&session_id).await; + let _ = stream_event( MessageEvent::Finish { reason: "stop".to_string(), + token_state: final_token_state, }, &task_tx, &cancel_token, diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index 61dfa73acd1e..0d28297b0608 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -750,14 +750,14 @@ impl Agent { .clone() .ok_or_else(|| anyhow::anyhow!("Session {} has no conversation", session_config.id))?; - let needs_auto_compact = - crate::context_mgmt::check_if_compaction_needed(self, &conversation, None, &session) + let needs_auto_compact = !is_manual_compact + && crate::context_mgmt::check_if_compaction_needed(self, &conversation, None, &session) .await?; let conversation_to_compact = conversation.clone(); Ok(Box::pin(async_stream::try_stream! { - let final_conversation = if !needs_auto_compact { + let final_conversation = if !needs_auto_compact && !is_manual_compact { conversation } else { if !is_manual_compact { diff --git a/crates/goose/src/conversation/message.rs b/crates/goose/src/conversation/message.rs index cc7d161dd841..2f18d038836f 100644 --- a/crates/goose/src/conversation/message.rs +++ b/crates/goose/src/conversation/message.rs @@ -711,7 +711,7 @@ impl Message { } } -#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct TokenState { pub input_tokens: i32, diff --git a/crates/goose/src/providers/utils.rs b/crates/goose/src/providers/utils.rs index f96b2e1a85d2..1839708fb51c 100644 --- a/crates/goose/src/providers/utils.rs +++ b/crates/goose/src/providers/utils.rs @@ -553,7 +553,7 @@ impl RequestLog { fn finish(&mut self) -> Result<()> { if let Some(mut writer) = self.writer.take() { writer.flush()?; - let logs_dir = crate::logging::prepare_log_directory("llm", true)?; + let logs_dir = Paths::in_state_dir("logs"); let log_path = |i| logs_dir.join(format!("llm_request.{}.jsonl", i)); for i in (0..LOGS_TO_KEEP - 1).rev() { diff --git a/scripts/test_compaction.sh b/scripts/test_compaction.sh new file mode 100755 index 000000000000..f3bf6d430c76 --- /dev/null +++ b/scripts/test_compaction.sh @@ -0,0 +1,242 @@ +#!/bin/bash + +# Compaction smoke test script +# Tests both manual (trigger prompt) and auto compaction (threshold-based) + +if [ -f .env ]; then + export $(grep -v '^#' .env | xargs) +fi + +if [ -z "$SKIP_BUILD" ]; then + echo "Building goose..." + cargo build --release --bin goose + echo "" +else + echo "Skipping build (SKIP_BUILD is set)..." + echo "" +fi + +SCRIPT_DIR=$(pwd) +GOOSE_BIN="$SCRIPT_DIR/target/release/goose" + +# Validation function to check compaction structure in session JSON +validate_compaction() { + local session_id=$1 + local test_name=$2 + + echo "Validating compaction structure for session: $session_id" + + # Export the session to JSON + local session_json=$($GOOSE_BIN session export --format json --session-id "$session_id" 2>&1) + + if [ $? -ne 0 ]; then + echo "✗ FAILED: Could not export session JSON" + echo " Error: $session_json" + return 1 + fi + + if ! command -v jq &> /dev/null; then + echo "⚠ WARNING: jq not available, cannot validate compaction structure" + return 0 + fi + + # Check basic structure + echo "$session_json" | jq -e '.conversation' > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "✗ FAILED: Session JSON missing 'conversation' field" + return 1 + fi + + local message_count=$(echo "$session_json" | jq '.conversation | length' 2>/dev/null) + echo " Session has $message_count messages" + + # Look for a summary message (assistant role with userVisible=false, agentVisible=true) + local has_summary=$(echo "$session_json" | jq '[.conversation[] | select(.role == "assistant" and .metadata.userVisible == false and .metadata.agentVisible == true)] | length > 0' 2>/dev/null) + + if [ "$has_summary" != "true" ]; then + echo "✗ FAILED: No summary message found (expected assistant message with userVisible=false, agentVisible=true)" + return 1 + fi + echo "✓ Found summary message with correct visibility flags" + + # Check for original messages with userVisible=true, agentVisible=false + local has_hidden_originals=$(echo "$session_json" | jq '[.conversation[] | select(.metadata.userVisible == true and .metadata.agentVisible == false)] | length > 0' 2>/dev/null) + + if [ "$has_hidden_originals" != "true" ]; then + echo "⚠ WARNING: No original messages found with userVisible=true, agentVisible=false" + echo " This might be OK if all messages were compacted" + else + echo "✓ Found original messages hidden from agent (userVisible=true, agentVisible=false)" + fi + + # For auto-compaction, check for the preserved user message (userVisible=true, agentVisible=true) + local has_preserved_user=$(echo "$session_json" | jq '[.conversation[] | select(.role == "user" and .metadata.userVisible == true and .metadata.agentVisible == true)] | length > 0' 2>/dev/null) + + if [ "$has_preserved_user" == "true" ]; then + echo "✓ Found preserved user message (userVisible=true, agentVisible=true)" + fi + + echo "✓ SUCCESS: Compaction structure is valid for $test_name" + return 0 +} + +echo "==================================================" +echo "COMPACTION SMOKE TESTS" +echo "==================================================" +echo "" + +# Check if jq is available +if ! command -v jq &> /dev/null; then + echo "⚠ WARNING: jq is not installed. Compaction structure validation will be limited." + echo " Install jq to enable full validation: brew install jq (macOS) or apt-get install jq (Linux)" + echo "" +fi + +RESULTS=() + +# ================================================== +# TEST 1: Manual Compaction +# ================================================== +echo "---------------------------------------------------" +echo "TEST 1: Manual Compaction via trigger prompt" +echo "---------------------------------------------------" + +TESTDIR=$(mktemp -d) +echo "hello world" > "$TESTDIR/hello.txt" +echo "Test directory: $TESTDIR" +echo "" + +OUTPUT=$(mktemp) + +echo "Step 1: Creating session with initial messages..." +(cd "$TESTDIR" && "$GOOSE_BIN" run --text "list files and read hello.txt" 2>&1) | tee "$OUTPUT" + +if ! command -v jq &> /dev/null; then + echo "✗ FAILED: jq is required for this test" + RESULTS+=("✗ Manual Compaction (jq required)") + rm -f "$OUTPUT" + rm -rf "$TESTDIR" +else + SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null) + + if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then + echo "✗ FAILED: Could not create session" + RESULTS+=("✗ Manual Compaction (no session)") + else + echo "" + echo "Session created: $SESSION_ID" + echo "Step 2: Sending manual compaction trigger..." + + # Send the manual compact trigger prompt + (cd "$TESTDIR" && "$GOOSE_BIN" run --resume --session-id "$SESSION_ID" --text "Please compact this conversation" 2>&1) | tee -a "$OUTPUT" + + echo "" + echo "Checking for compaction evidence..." + + if grep -qi "compacting\|compacted\|compaction" "$OUTPUT"; then + echo "✓ SUCCESS: Manual compaction was triggered" + + if validate_compaction "$SESSION_ID" "manual compaction"; then + RESULTS+=("✓ Manual Compaction") + else + RESULTS+=("✗ Manual Compaction (structure validation failed)") + fi + else + echo "✗ FAILED: Manual compaction was not triggered" + RESULTS+=("✗ Manual Compaction") + fi + fi + + rm -f "$OUTPUT" + rm -rf "$TESTDIR" +fi + +echo "" +echo "" + +# ================================================== +# TEST 2: Auto Compaction +# ================================================== +echo "---------------------------------------------------" +echo "TEST 2: Auto Compaction via threshold (0.01)" +echo "---------------------------------------------------" + +TESTDIR=$(mktemp -d) +echo "test content" > "$TESTDIR/test.txt" +echo "Test directory: $TESTDIR" +echo "" + +# Set auto-compact threshold very low (1%) to trigger it quickly +export GOOSE_AUTO_COMPACT_THRESHOLD=0.01 + +OUTPUT=$(mktemp) + +echo "Step 1: Creating session with first message..." +(cd "$TESTDIR" && "$GOOSE_BIN" run --text "hello" 2>&1) | tee "$OUTPUT" + +if ! command -v jq &> /dev/null; then + echo "✗ FAILED: jq is required for this test" + RESULTS+=("✗ Auto Compaction (jq required)") +else + SESSION_ID=$("$GOOSE_BIN" session list --format json 2>/dev/null | jq -r '.[0].id' 2>/dev/null) + + if [ -z "$SESSION_ID" ] || [ "$SESSION_ID" = "null" ]; then + echo "✗ FAILED: Could not create session" + RESULTS+=("✗ Auto Compaction (no session)") + else + echo "" + echo "Session created: $SESSION_ID" + echo "Step 2: Sending second message (should trigger auto-compact)..." + + # Send second message - auto-compaction should trigger before processing this + (cd "$TESTDIR" && "$GOOSE_BIN" run --resume --session-id "$SESSION_ID" --text "hi again" 2>&1) | tee -a "$OUTPUT" + + echo "" + echo "Checking for auto-compaction evidence..." + + if grep -qi "auto.*compact\|exceeded.*auto.*compact.*threshold" "$OUTPUT"; then + echo "✓ SUCCESS: Auto compaction was triggered" + + if validate_compaction "$SESSION_ID" "auto compaction"; then + RESULTS+=("✓ Auto Compaction") + else + RESULTS+=("✗ Auto Compaction (structure validation failed)") + fi + else + echo "✗ FAILED: Auto compaction was not triggered" + echo " Expected to see auto-compact messages with threshold of 0.01" + RESULTS+=("✗ Auto Compaction") + fi + fi +fi + +# Unset the env variable +unset GOOSE_AUTO_COMPACT_THRESHOLD + +rm -f "$OUTPUT" +rm -rf "$TESTDIR" + +echo "" +echo "" + +# ================================================== +# Summary +# ================================================== +echo "==================================================" +echo "TEST SUMMARY" +echo "==================================================" +for result in "${RESULTS[@]}"; do + echo "$result" +done + +# Count results +FAILURE_COUNT=$(echo "${RESULTS[@]}" | grep -o "✗" | wc -l | tr -d ' ') + +if [ "$FAILURE_COUNT" -gt 0 ]; then + echo "" + echo "❌ $FAILURE_COUNT test(s) failed!" + exit 1 +else + echo "" + echo "✅ All tests passed!" +fi diff --git a/ui/desktop/openapi.json b/ui/desktop/openapi.json index d7412d5bf626..216aa9f0cd1e 100644 --- a/ui/desktop/openapi.json +++ b/ui/desktop/openapi.json @@ -10,7 +10,7 @@ "license": { "name": "Apache-2.0" }, - "version": "1.12.0" + "version": "1.13.0" }, "paths": { "/agent/add_extension": { @@ -3321,12 +3321,16 @@ "type": "object", "required": [ "reason", + "token_state", "type" ], "properties": { "reason": { "type": "string" }, + "token_state": { + "$ref": "#/components/schemas/TokenState" + }, "type": { "type": "string", "enum": [ diff --git a/ui/desktop/package-lock.json b/ui/desktop/package-lock.json index 224643d1b06b..897a31bdae91 100644 --- a/ui/desktop/package-lock.json +++ b/ui/desktop/package-lock.json @@ -1,12 +1,12 @@ { "name": "goose-app", - "version": "1.12.0", + "version": "1.13.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "goose-app", - "version": "1.12.0", + "version": "1.13.0", "license": "Apache-2.0", "dependencies": { "@ai-sdk/openai": "^2.0.52", diff --git a/ui/desktop/package.json b/ui/desktop/package.json index 386baae08904..8952fce455a1 100644 --- a/ui/desktop/package.json +++ b/ui/desktop/package.json @@ -1,7 +1,7 @@ { "name": "goose-app", "productName": "Goose", - "version": "1.12.0", + "version": "1.13.0", "description": "Goose App", "engines": { "node": "^22.17.1" diff --git a/ui/desktop/src/api/types.gen.ts b/ui/desktop/src/api/types.gen.ts index 6468fd3ee3cb..098489445a7b 100644 --- a/ui/desktop/src/api/types.gen.ts +++ b/ui/desktop/src/api/types.gen.ts @@ -374,6 +374,7 @@ export type MessageEvent = { type: 'Error'; } | { reason: string; + token_state: TokenState; type: 'Finish'; } | { mode: string; diff --git a/ui/desktop/src/components/BaseChat.tsx b/ui/desktop/src/components/BaseChat.tsx index 67090686db2c..fa96740f064c 100644 --- a/ui/desktop/src/components/BaseChat.tsx +++ b/ui/desktop/src/components/BaseChat.tsx @@ -443,12 +443,12 @@ function BaseChatContent({ commandHistory={commandHistory} initialValue={input || ''} setView={setView} - totalTokens={tokenState?.totalTokens ?? sessionTokenCount} + totalTokens={tokenState?.totalTokens || sessionTokenCount} accumulatedInputTokens={ - tokenState?.accumulatedInputTokens ?? sessionInputTokens ?? localInputTokens + tokenState?.accumulatedInputTokens || sessionInputTokens || localInputTokens } accumulatedOutputTokens={ - tokenState?.accumulatedOutputTokens ?? sessionOutputTokens ?? localOutputTokens + tokenState?.accumulatedOutputTokens || sessionOutputTokens || localOutputTokens } droppedFiles={droppedFiles} onFilesProcessed={() => setDroppedFiles([])} // Clear dropped files after processing diff --git a/ui/desktop/src/hooks/useChatEngine.ts b/ui/desktop/src/hooks/useChatEngine.ts index 7c60038e6316..19a333aefda9 100644 --- a/ui/desktop/src/hooks/useChatEngine.ts +++ b/ui/desktop/src/hooks/useChatEngine.ts @@ -213,7 +213,6 @@ export const useChatEngine = ({ // Update token counts when session changes from the message stream useEffect(() => { - console.log('Session received:', session); if (session) { setSessionTokenCount(session.total_tokens || 0); setSessionInputTokens(session.accumulated_input_tokens || 0); diff --git a/ui/desktop/src/hooks/useMessageStream.ts b/ui/desktop/src/hooks/useMessageStream.ts index 2eb98835935b..4e36c7d9ec76 100644 --- a/ui/desktop/src/hooks/useMessageStream.ts +++ b/ui/desktop/src/hooks/useMessageStream.ts @@ -37,7 +37,7 @@ export interface NotificationEvent { type MessageEvent = | { type: 'Message'; message: Message; token_state: TokenState } | { type: 'Error'; error: string } - | { type: 'Finish'; reason: string } + | { type: 'Finish'; reason: string; token_state: TokenState } | { type: 'ModelChange'; model: string; mode: string } | { type: 'UpdateConversation'; conversation: Conversation } | NotificationEvent; @@ -368,6 +368,8 @@ export function useMessageStream({ } case 'Finish': { + setTokenState(parsedEvent.token_state); + if (onFinish && currentMessages.length > 0) { const lastMessage = currentMessages[currentMessages.length - 1]; onFinish(lastMessage, parsedEvent.reason);